diff --git a/README.rst b/README.rst index 1db37f5b3e8937095d209680e9ad58274d9a3bee..eeb564dba3fbf9e791c9d909dba4f5b770e5362d 100644 --- a/README.rst +++ b/README.rst @@ -11,4 +11,27 @@ be selected on the online cluster by: `module load exfel exfel_anaconda3` before launching the jupyter-notebook or on max-jhub by selecting the 'xfel' -kernel instead of the 'Python 3' anaconda environement maintained by DESY. \ No newline at end of file +kernel instead of the 'Python 3' anaconda environement maintained by DESY. + +Installation +############ + +As long as the ToolBox is not yet added to the exfel_anaconda3 environment it needs to be installed locally. + +Activate environment mentioned above and check installation of scs_toolbox: + +.. code:: bash + + pip show toolbox_scs + +If the toolbox has been installed in your home directory previously, everything is set up. Otherwise it needs to be installed (only once). In that case enter the following command in the directory where the *setup.py* script is located: + +.. code:: bash + + pip install --user . + +If you intend to develop code in the toolbox use the -e flag for installation. This creates a symbolic link to the source code you are working on. + +.. code:: bash + + pip install --user -e . \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..50b3c80a115e4b6e47ba3769a0aa45ca706601ab --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,10 @@ +``Getting started`` +~~~~~~~~~~~~~~~~~~~ + + +``How to`` +~~~~~~~~~~ + + +``Contribute`` +~~~~~~~~~~~~~~ diff --git a/index.rst b/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..0f838a4bbc0251459ec0fd733121920988fef92d --- /dev/null +++ b/index.rst @@ -0,0 +1,13 @@ +The SCS Toolbox +=============== + +.. toctree:: + :maxdepth: 2 + + doc/index.rst + + +Module index +============ + +*to be done* diff --git a/src/toolbox_scs/__init__.py b/src/toolbox_scs/__init__.py index 7fe650ee017808738dfe21839723b2c2ff74fc9e..5b2cc722cbcb7e2c89bc3d3699f47bbaf081869e 100644 --- a/src/toolbox_scs/__init__.py +++ b/src/toolbox_scs/__init__.py @@ -1,5 +1,5 @@ from .load import (load, concatenateRuns, load_scan_variable, - run_by_proposal, run_by_path) + load_run, run_by_path) from .constants import mnemonics @@ -8,7 +8,7 @@ __all__ = ( "load", "concatenateRuns", "load_scan_variable", - "run_by_proposal", + "load_run", "run_by_path", # Classes # Variables diff --git a/src/toolbox_scs/detectors/dssc.py b/src/toolbox_scs/detectors/dssc.py index 601f01ca1f446a720d29d066b271398f143266d9..1bb38f13ebe9cd536dc9617f1457b30e21d9b0bf 100644 --- a/src/toolbox_scs/detectors/dssc.py +++ b/src/toolbox_scs/detectors/dssc.py @@ -21,7 +21,6 @@ import xarray as xr import pandas as pd import extra_data as ed -from ..load import run_by_proposal as _open_run log = logging.getLogger(__name__) @@ -45,7 +44,7 @@ def load_dssc_info(proposal, run_nr): {'dims': tuple, 'frames_per_train': int, 'total_frames': int} """ - module = _open_run(proposal, run_nr, include='*DSSC00*') + module = ed.open_run(proposal, run_nr, include='*DSSC00*') info = module.detector_info('SCS_DET_DSSC1M-1/DET/0CH0:xtdf') log.debug("Fetched information for DSSC module nr. 0.") return info @@ -81,6 +80,7 @@ def calc_xgm_frame_indices(nbunches, framepattern): if 'dark' not in p: frame_indices.append(np.arange(i, frame_max, n_frames)) + log.debug("Constructed coordinate array for XGM data.") return np.sort(np.concatenate(frame_indices)) @@ -118,14 +118,27 @@ def prepare_module_empty(scan_variable, framepattern): module_data[name] = empty.copy() module_data['sum_count_' + name] = empty_sum_count.copy() + log.debug("Prepared empty data array for single dssc module") return module_data def load_chunk_data(sel, sourcename, maxframes=None): - '''Load DSSC data (sel is a DataCollection or a subset of a DataCollection - obtained by its select_trains() method). The flattened multi-index - (trains+pulses) is unraveled before returning the data. - ''' + """ + Load selected DSSC data. The flattened multi-index (trains+pulses) is + unraveled before returning the data. + + Parameters + ---------- + sel: extra_data.DataCollection + a DataCollection or a subset of a DataCollection obtained by its + select_trains() method + sourcename: str + + Returns + ------- + xarray.DataArray + """ + info = sel.detector_info(sourcename) fpt = info['frames_per_train'] frames_total = info['total_frames'] @@ -142,21 +155,42 @@ def load_chunk_data(sel, sourcename, maxframes=None): dict(trainId_pulse=midx) ).unstack('trainId_pulse') data = data.transpose('trainId', 'pulse', 'x', 'y') + return data.loc[{'pulse': np.s_[:maxframes]}] def merge_chunk_data(module_data, chunk_data, framepattern): - '''Merge chunk data with prepared dataset for entire module. + """ + Merge chunk data with prepared dataset for entire module. Aligns on "scan_variable" and sums values for variables ['pumped', 'unpumped', 'sum_count'] Concatenates the data along a new dimension ('tmp') and uses - the sum() method for automatic dtype conversion''' + the sum() method for automatic dtype conversion + + Parameters + ---------- + module_data: xarray.Dataset + module data array to be filled + chunk_data: xarray.Dataset + loaded chunk of data to be merged into module_data + framepattern: list of strings + example: ['pumped', 'unpumped', 'sum_count'] + + Returns + ------- + module_data: xarray.Dataset + merged module data: + """ + where = dict(scan_variable=chunk_data.scan_variable) for name in framepattern: for prefix in ['', 'sum_count_']: var = prefix + name - summed = xr.concat([module_data[var].loc[where], chunk_data[var]], dim='tmp').sum('tmp') + summed = xr.concat([module_data[var].loc[where], chunk_data[var]], + dim='tmp').sum('tmp') module_data[var].loc[where] = summed + + log.debug("Merged chunked data") return module_data @@ -164,13 +198,25 @@ def split_frames(data, pattern, prefix=''): """ Split frames according to "pattern" (possibly repeating) and average over resulting splits. - "pattern" is a list of frame names (order matters!). Examples: - pattern = ['pumped', 'pumped_dark', 'unpumped', 'unpumped_dark'] # 4 - DSSC frames, 2 FEL pulses - pattern = ['pumped', 'unpumped'] # 2 FEL frames, no intermediate darks - pattern = ['image'] # no splitting, average over all frames - Returns a dataset with data variables named prefix + framename + + Parameters + ---------- + data: + pattern: + A list of frame names (order matters!). Examples: + # 4 DSSC frames, 2 FEL pulses + pattern = ['pumped', 'pumped_dark', 'unpumped', 'unpumped_dark'] + # 2 FEL frames, no intermediate darks + pattern = ['pumped', 'unpumped'] + # no splitting, average over all frames + pattern = ['image'] + + Returns + ------- + dataset: xarray.DataArray + a dataset with data variables named prefix + framename """ + n = len(pattern) dataset = xr.Dataset() for i, name in enumerate(pattern): @@ -203,16 +249,19 @@ def process_intra_train(job): dims = ['pulse', 'x', 'y'] coords = {'pulse': np.arange(fpt, dtype=int)} shape = [fpt, 128, 512] - module_data = xr.DataArray(np.zeros(shape, dtype=float), dims=dims, coords=coords) + module_data = xr.DataArray(np.zeros(shape, dtype=float), dims=dims, + coords=coords) module_data = module_data.to_dataset(name='image') - module_data['sum_count'] = xr.DataArray(np.zeros(fpt, dtype=int), dims=['pulse']) + module_data['sum_count'] = xr.DataArray(np.zeros(fpt, dtype=int), + dims=['pulse']) ntrains = len(collection.train_ids) chunks = np.arange(ntrains, step=chunksize) if module == 15: pbar = tqdm(total=len(chunks)) for start_index in chunks: - sel = collection.select_trains(kd.by_index[start_index:start_index + chunksize]) + sel = collection.select_trains( + kd.by_index[start_index:start_index + chunksize]) data = load_chunk_data(sel, sourcename, maxframes) data = data.to_dataset(name='image') @@ -220,8 +269,10 @@ def process_intra_train(job): data = data.sum('trainId') for var in ['image', 'sum_count']: - # concatenating and using the sum() method automatically takes care of dtype casting if necessary - module_data[var] = xr.concat([module_data[var], data[var]], dim='tmp').sum('tmp') + # concatenating and using the sum() method automatically takes care + # of dtype casting if necessary + module_data[var] = xr.concat([module_data[var], data[var]], + dim='tmp').sum('tmp') if module == 15: pbar.update(1) @@ -266,6 +317,7 @@ def process_dssc_module(job): """ + log.info(f"processing dssc module {module}: start") proposal = job['proposal'] run_nr = job['run_nr'] module = job['module'] @@ -300,6 +352,8 @@ def process_dssc_module(job): ed.by_index[start_index:start_index + chunksize]) nframes = sel.detector_info(sourcename)['total_frames'] if nframes > 0: # some chunks have no DSSC data at all + log.debug(f"Module {module}: " + f"load trains {start_index}:{start_index + chunksize}") data = load_chunk_data(sel, sourcename) sum_count = xr.full_like(data[..., 0, 0], fill_value=1) if pulsemask is not None: @@ -314,10 +368,16 @@ def process_dssc_module(job): # aligns on trainId, drops non-matching trains data['scan_variable'] = scan data = data.groupby('scan_variable').sum('trainId') + + log.debug(f"Module {module}: " + f"merge trains {start_index}:{start_index + chunksize}") module_data = merge_chunk_data(module_data, data, framepattern) + if module == 15: pbar.update(1) for name in framepattern: module_data[name] = module_data[name] / module_data['sum_count_' + name] + + log.info(f"processing module {module}: done") return module_data \ No newline at end of file diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py index 947dea088b734ddeb776e99334dcc61c35be25ad..574e106fa933382ebc16957a6be1458259334bb4 100644 --- a/src/toolbox_scs/load.py +++ b/src/toolbox_scs/load.py @@ -1,17 +1,20 @@ # -*- coding: utf-8 -*- -""" Toolbox for SCS. +""" + Toolbox for SCS. Various utilities function to quickly process data measured at the SCS instruments. Copyright (2019) SCS Team. """ + import os import logging import numpy as np import xarray as xr -from extra_data import by_index, RunDirectory, open_run + +import extra_data as ed from extra_data.read_machinery import find_proposal from .misc.bunch_pattern import extractBunchPattern @@ -21,32 +24,60 @@ from .util.exceptions import * log = logging.getLogger(__name__) -def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=False, - subset=by_index[:], rois={}, useBPTable=True): - """ Load a run and extract the data. Output is an xarray with aligned trainIds - - Inputs: - fields: list of mnemonic strings to load specific data such as "fastccd", "SCS_XGM", - or dictionnaries defining a custom mnemonic such as - {"extra": {'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'actual_current.value', None}} - runNB: (str, int) run number as integer - proposalNB: (str, int) of the proposal number e.g. 'p002252' or 2252 - subFolder: (str) sub-folder from which to load the data. Use 'raw' for raw - data or 'proc' for processed data. - display: (bool) whether to show the run.info or not - validate: (bool) whether to run extra-data-validate or not - subset: a subset of train that can be load with by_index[:5] for the - first 5 trains - rois: a dictionnary of mnemonics with a list of rois definition and the desired - names, for example {'fastccd':{'ref':{'roi':by_index[730:890, 535:720], - 'dim': ['ref_x', 'ref_y']}, 'sam':{'roi':by_index[1050:1210, 535:720], - 'dim': ['sam_x', 'sam_y']}}} - useBPTable: If True, uses the raw bunch pattern table to extract sase pulse - number and indices in the trains. If false, load the data from BUNCH_DECODER - middle layer device. - - Outputs: - res: an xarray DataSet with aligned trainIds +def load(fields, runNB, proposalNB, + subFolder='raw', + display=False, + validate=False, + subset=ed.by_index[:], + rois={}, + useBPTable=True): + """ + Load a run and extract the data. Output is an xarray with aligned + trainIds + + Parameters + ---------- + + fields: list of dictionaries + list of mnemonic strings to load specific data such as "fastccd", + "SCS_XGM", or dictionnaries defining a custom mnemonic such as + {"extra": + {'SCS_CDIFFT_MAG/SUPPLY/CURRENT', + 'actual_current.value', None}} + runNB: (str, int) + run number as integer + proposalNB: (str, int) + of the proposal number e.g. 'p002252' or 2252 + subFolder: (str) + sub-folder from which to load the data. Use 'raw' for raw data + or 'proc' for processed data. + display: (bool) + whether to show the run.info or not + validate: (bool) + whether to run extra-data-validate or not + subset: + a subset of train that can be load with by_index[:5] for the first 5 + trains + rois: dictionary + a dictionnary of mnemonics with a list of rois definition and + the desired names, for example: + {'fastccd': + {'ref': + {'roi': by_index[730:890, 535:720], + 'dim': ['ref_x', 'ref_y']}, + 'sam': + {'roi':by_index[1050:1210, 535:720], + 'dim': ['sam_x', 'sam_y']}}} + + useBPTable: boolean + If True, uses the raw bunch pattern table to extract sase pulse number + and indices in the trains. If false, load the data from BUNCH_DECODER + middle layer device. + + Returns + ------- + res: xarray.DataArray + an xarray DataSet with aligned trainIds """ if isinstance(runNB, int): @@ -54,7 +85,7 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal if isinstance(proposalNB,int): proposalNB = 'p{:06d}'.format(proposalNB) runFolder = os.path.join(find_proposal(proposalNB), subFolder, runNB) - run = RunDirectory(runFolder).select_trains(subset) + run = ed.RunDirectory(runFolder).select_trains(subset) if validate: get_ipython().system('extra-data-validate ' + runFolder) @@ -86,7 +117,8 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal if type(f) == dict: # extracting mnemomic defined on the spot if len(f.keys()) > 1: - print('Loading only one "on-the-spot" mnemonic at a time, skipping all others !') + print('Loading only one "on-the-spot" mnemonic at a time, ' + 'skipping all others !') k = list(f.keys())[0] v = f[k] else: @@ -110,12 +142,15 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal if k not in rois: # no ROIs selection, we read everything - vals.append(run.get_array(v['source'], v['key'], extra_dims=v['dim'])) + vals.append(run.get_array(v['source'], v['key'], + extra_dims=v['dim'])) keys.append(k) else: - # ROIs selection, for each ROI we select a region of the data and save it with new name and dimensions + # ROIs selection, for each ROI we select a region of the data and + # save it with new name and dimensions for nk,nv in rois[k].items(): - vals.append(run.get_array(v['source'], v['key'], extra_dims=nv['dim'], roi=nv['roi'])) + vals.append(run.get_array(v['source'], v['key'], + extra_dims=nv['dim'], roi=nv['roi'])) keys.append(nk) aligned_vals = xr.align(*vals, join='inner') @@ -126,12 +161,12 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal return result -def run_by_proposal(proposal, run, **kwargs): +def load_run(proposal, run, **kwargs): """ Get run in given proposal - Wraps the extra_data open_run routine, to ease its use for the - scs-toolbox user. + Wraps the extra_data open_run routine, out of convenience for the toolbox + user. Parameters ---------- @@ -153,7 +188,7 @@ def run_by_proposal(proposal, run, **kwargs): DataCollection object containing information about the specified run. Data can be loaded using built-in class methods. """ - return open_run(proposal, run, **kwargs) + return ed.open_run(proposal, run, **kwargs) def run_by_path(path): @@ -174,7 +209,7 @@ def run_by_path(path): DataCollection object containing information about the specified run. Data can be loaded using built-in class methods. """ - return RunDirectory(path) + return ed.RunDirectory(path) def concatenateRuns(runs): @@ -228,7 +263,7 @@ def load_scan_variable(run, mnemonic, stepsize=None): Example ------- >>> import toolbox_scs as tb - >>> run = tb.run_by_proposal(2212, 235) + >>> run = tb.load_run(2212, 235) >>> mnemonic = 'PP800_PhaseShifter' >>> scan_variable = tb.load_scan_variable( self.ed_run, mnemonic, 0.5) diff --git a/src/toolbox_scs/test/test_detectors_common.py b/src/toolbox_scs/test/test_detectors_common.py index 4a139505d75957078c98a822b6f3ba5ca7db42e7..7e54d491670ccda2879c97017cebda0e71734afd 100644 --- a/src/toolbox_scs/test/test_detectors_common.py +++ b/src/toolbox_scs/test/test_detectors_common.py @@ -34,7 +34,7 @@ class TestDetectors(unittest.TestCase): @classmethod def setUpClass(cls): log_root.info("Start global setup.") - cls._run = tb.run_by_proposal(2212, 235) + cls._run = tb.load_run(2212, 235) fields = ["sase1", "sase3", "npulses_sase3", "npulses_sase1", "MCP2apd", "SCS_SA3", "nrj"] diff --git a/src/toolbox_scs/test/test_detectors_dssc.py b/src/toolbox_scs/test/test_detectors_dssc.py index bd4a38df96aed6e128aeffb7f42d3cc14f8901e6..eb54fac6809ff503fd940fe1428955e762befa8d 100644 --- a/src/toolbox_scs/test/test_detectors_dssc.py +++ b/src/toolbox_scs/test/test_detectors_dssc.py @@ -43,6 +43,7 @@ suites = {"no-processing": ( "test_prepareempty", "test_loadchunkdata", "test_splitframes", + "test_mergechunks", ), "full": ( "test_info", @@ -51,6 +52,7 @@ suites = {"no-processing": ( "test_prepareempty", "test_loadchunkdata", "test_splitframes", + "test_mergechunks", "test_processmodule", ) } @@ -77,7 +79,7 @@ class TestDSSC(unittest.TestCase): cls._scanfile = './tmp/scan.h5' cls._maskfile = './tmp/mask.h5' - cls._run = tb.run_by_proposal(proposal, run_nr, include='*DA*') + cls._run = tb.load_run(proposal, run_nr, include='*DA*') cls._scan_variable = tb.load_scan_variable(cls._run, scan_variable, stepsize) cls._scan_variable.to_netcdf(cls._scanfile, group='data', mode='w', @@ -154,6 +156,15 @@ class TestDSSC(unittest.TestCase): ed.by_index[start_index:start_index + chunksize]) data = load_chunk_data(sel, sourcename) self.assertIsNotNone(data) + log_root.debug(f"Loaded {ntrains} trains for {sourcename}") + + + def test_splitframes(self): + pass + + + def test_mergechunks(self): + pass def test_processmodule(self): @@ -182,8 +193,6 @@ class TestDSSC(unittest.TestCase): print('finished processing modules:', strftime('%X')) - def test_splitframes(self): - pass def list_suites(): print("""\nPossible test suites:\n-------------------------""") diff --git a/src/toolbox_scs/test/test_top_level.py b/src/toolbox_scs/test/test_top_level.py index 74f7829eb061f2e2921202826d649c971928ece7..fc133af6c8701e1ca57d1ba5f2689936f8dacd78 100644 --- a/src/toolbox_scs/test/test_top_level.py +++ b/src/toolbox_scs/test/test_top_level.py @@ -20,7 +20,7 @@ suites = {"packaging": ( "load": ( "test_load", "test_openrun", - #"test_openrunpath", + "test_openrunpath", "test_loadscanvariable1", "test_loadscanvariable2", ) @@ -65,7 +65,7 @@ class TestToolbox(unittest.TestCase): self.assertEqual(run_tb['npulses_sase3'].values[0], 42) def test_openrun(self): - run = tb.run_by_proposal(2212, 235) + run = tb.load_run(2212, 235) src = 'SCS_DET_DSSC1M-1/DET/0CH0:xtdf' self.assertTrue(src in run.all_sources) diff --git a/src/toolbox_scs/util/data_access.py b/src/toolbox_scs/util/data_access.py index 66ff996428faa0ef5d5d0be5860c057147dfea45..948f650e3479130c986b7e001dd3f55d46df7ed9 100644 --- a/src/toolbox_scs/util/data_access.py +++ b/src/toolbox_scs/util/data_access.py @@ -39,6 +39,11 @@ def find_run_dir(proposal, run): Error raised if the constructed path does not exist. This may happen when entering a non-valid run number, or the folder has been renamed/removed. + + Comment + ------- + The rather unspecified Exeption raised, when entering a invalid proposal + number stems from the ed package -> to be fixed externally. """ rdir = None