Skip to content
Snippets Groups Projects
Commit 33896191 authored by Rafael Gort's avatar Rafael Gort Committed by Martin Teichmann
Browse files

Added snippets for doc. Cleaned unnecessary in-package use of ed wrapper. Added log to dssc module.

parent 5fa6f7de
No related branches found
No related tags found
No related merge requests found
......@@ -11,4 +11,27 @@ be selected on the online cluster by:
`module load exfel exfel_anaconda3`
before launching the jupyter-notebook or on max-jhub by selecting the 'xfel'
kernel instead of the 'Python 3' anaconda environement maintained by DESY.
\ No newline at end of file
kernel instead of the 'Python 3' anaconda environement maintained by DESY.
Installation
############
As long as the ToolBox is not yet added to the exfel_anaconda3 environment it needs to be installed locally.
Activate environment mentioned above and check installation of scs_toolbox:
.. code:: bash
pip show toolbox_scs
If the toolbox has been installed in your home directory previously, everything is set up. Otherwise it needs to be installed (only once). In that case enter the following command in the directory where the *setup.py* script is located:
.. code:: bash
pip install --user .
If you intend to develop code in the toolbox use the -e flag for installation. This creates a symbolic link to the source code you are working on.
.. code:: bash
pip install --user -e .
\ No newline at end of file
``Getting started``
~~~~~~~~~~~~~~~~~~~
``How to``
~~~~~~~~~~
``Contribute``
~~~~~~~~~~~~~~
The SCS Toolbox
===============
.. toctree::
:maxdepth: 2
doc/index.rst
Module index
============
*to be done*
from .load import (load, concatenateRuns, load_scan_variable,
run_by_proposal, run_by_path)
load_run, run_by_path)
from .constants import mnemonics
......@@ -8,7 +8,7 @@ __all__ = (
"load",
"concatenateRuns",
"load_scan_variable",
"run_by_proposal",
"load_run",
"run_by_path",
# Classes
# Variables
......
......@@ -21,7 +21,6 @@ import xarray as xr
import pandas as pd
import extra_data as ed
from ..load import run_by_proposal as _open_run
log = logging.getLogger(__name__)
......@@ -45,7 +44,7 @@ def load_dssc_info(proposal, run_nr):
{'dims': tuple, 'frames_per_train': int, 'total_frames': int}
"""
module = _open_run(proposal, run_nr, include='*DSSC00*')
module = ed.open_run(proposal, run_nr, include='*DSSC00*')
info = module.detector_info('SCS_DET_DSSC1M-1/DET/0CH0:xtdf')
log.debug("Fetched information for DSSC module nr. 0.")
return info
......@@ -81,6 +80,7 @@ def calc_xgm_frame_indices(nbunches, framepattern):
if 'dark' not in p:
frame_indices.append(np.arange(i, frame_max, n_frames))
log.debug("Constructed coordinate array for XGM data.")
return np.sort(np.concatenate(frame_indices))
......@@ -118,14 +118,27 @@ def prepare_module_empty(scan_variable, framepattern):
module_data[name] = empty.copy()
module_data['sum_count_' + name] = empty_sum_count.copy()
log.debug("Prepared empty data array for single dssc module")
return module_data
def load_chunk_data(sel, sourcename, maxframes=None):
'''Load DSSC data (sel is a DataCollection or a subset of a DataCollection
obtained by its select_trains() method). The flattened multi-index
(trains+pulses) is unraveled before returning the data.
'''
"""
Load selected DSSC data. The flattened multi-index (trains+pulses) is
unraveled before returning the data.
Parameters
----------
sel: extra_data.DataCollection
a DataCollection or a subset of a DataCollection obtained by its
select_trains() method
sourcename: str
Returns
-------
xarray.DataArray
"""
info = sel.detector_info(sourcename)
fpt = info['frames_per_train']
frames_total = info['total_frames']
......@@ -142,21 +155,42 @@ def load_chunk_data(sel, sourcename, maxframes=None):
dict(trainId_pulse=midx)
).unstack('trainId_pulse')
data = data.transpose('trainId', 'pulse', 'x', 'y')
return data.loc[{'pulse': np.s_[:maxframes]}]
def merge_chunk_data(module_data, chunk_data, framepattern):
'''Merge chunk data with prepared dataset for entire module.
"""
Merge chunk data with prepared dataset for entire module.
Aligns on "scan_variable" and sums values for variables
['pumped', 'unpumped', 'sum_count']
Concatenates the data along a new dimension ('tmp') and uses
the sum() method for automatic dtype conversion'''
the sum() method for automatic dtype conversion
Parameters
----------
module_data: xarray.Dataset
module data array to be filled
chunk_data: xarray.Dataset
loaded chunk of data to be merged into module_data
framepattern: list of strings
example: ['pumped', 'unpumped', 'sum_count']
Returns
-------
module_data: xarray.Dataset
merged module data:
"""
where = dict(scan_variable=chunk_data.scan_variable)
for name in framepattern:
for prefix in ['', 'sum_count_']:
var = prefix + name
summed = xr.concat([module_data[var].loc[where], chunk_data[var]], dim='tmp').sum('tmp')
summed = xr.concat([module_data[var].loc[where], chunk_data[var]],
dim='tmp').sum('tmp')
module_data[var].loc[where] = summed
log.debug("Merged chunked data")
return module_data
......@@ -164,13 +198,25 @@ def split_frames(data, pattern, prefix=''):
"""
Split frames according to "pattern" (possibly repeating) and average over
resulting splits.
"pattern" is a list of frame names (order matters!). Examples:
pattern = ['pumped', 'pumped_dark', 'unpumped', 'unpumped_dark'] # 4
DSSC frames, 2 FEL pulses
pattern = ['pumped', 'unpumped'] # 2 FEL frames, no intermediate darks
pattern = ['image'] # no splitting, average over all frames
Returns a dataset with data variables named prefix + framename
Parameters
----------
data:
pattern:
A list of frame names (order matters!). Examples:
# 4 DSSC frames, 2 FEL pulses
pattern = ['pumped', 'pumped_dark', 'unpumped', 'unpumped_dark']
# 2 FEL frames, no intermediate darks
pattern = ['pumped', 'unpumped']
# no splitting, average over all frames
pattern = ['image']
Returns
-------
dataset: xarray.DataArray
a dataset with data variables named prefix + framename
"""
n = len(pattern)
dataset = xr.Dataset()
for i, name in enumerate(pattern):
......@@ -203,16 +249,19 @@ def process_intra_train(job):
dims = ['pulse', 'x', 'y']
coords = {'pulse': np.arange(fpt, dtype=int)}
shape = [fpt, 128, 512]
module_data = xr.DataArray(np.zeros(shape, dtype=float), dims=dims, coords=coords)
module_data = xr.DataArray(np.zeros(shape, dtype=float), dims=dims,
coords=coords)
module_data = module_data.to_dataset(name='image')
module_data['sum_count'] = xr.DataArray(np.zeros(fpt, dtype=int), dims=['pulse'])
module_data['sum_count'] = xr.DataArray(np.zeros(fpt, dtype=int),
dims=['pulse'])
ntrains = len(collection.train_ids)
chunks = np.arange(ntrains, step=chunksize)
if module == 15:
pbar = tqdm(total=len(chunks))
for start_index in chunks:
sel = collection.select_trains(kd.by_index[start_index:start_index + chunksize])
sel = collection.select_trains(
kd.by_index[start_index:start_index + chunksize])
data = load_chunk_data(sel, sourcename, maxframes)
data = data.to_dataset(name='image')
......@@ -220,8 +269,10 @@ def process_intra_train(job):
data = data.sum('trainId')
for var in ['image', 'sum_count']:
# concatenating and using the sum() method automatically takes care of dtype casting if necessary
module_data[var] = xr.concat([module_data[var], data[var]], dim='tmp').sum('tmp')
# concatenating and using the sum() method automatically takes care
# of dtype casting if necessary
module_data[var] = xr.concat([module_data[var], data[var]],
dim='tmp').sum('tmp')
if module == 15:
pbar.update(1)
......@@ -266,6 +317,7 @@ def process_dssc_module(job):
"""
log.info(f"processing dssc module {module}: start")
proposal = job['proposal']
run_nr = job['run_nr']
module = job['module']
......@@ -300,6 +352,8 @@ def process_dssc_module(job):
ed.by_index[start_index:start_index + chunksize])
nframes = sel.detector_info(sourcename)['total_frames']
if nframes > 0: # some chunks have no DSSC data at all
log.debug(f"Module {module}: "
f"load trains {start_index}:{start_index + chunksize}")
data = load_chunk_data(sel, sourcename)
sum_count = xr.full_like(data[..., 0, 0], fill_value=1)
if pulsemask is not None:
......@@ -314,10 +368,16 @@ def process_dssc_module(job):
# aligns on trainId, drops non-matching trains
data['scan_variable'] = scan
data = data.groupby('scan_variable').sum('trainId')
log.debug(f"Module {module}: "
f"merge trains {start_index}:{start_index + chunksize}")
module_data = merge_chunk_data(module_data, data, framepattern)
if module == 15:
pbar.update(1)
for name in framepattern:
module_data[name] = module_data[name] / module_data['sum_count_' + name]
log.info(f"processing module {module}: done")
return module_data
\ No newline at end of file
# -*- coding: utf-8 -*-
""" Toolbox for SCS.
"""
Toolbox for SCS.
Various utilities function to quickly process data measured at the SCS
instruments.
Copyright (2019) SCS Team.
"""
import os
import logging
import numpy as np
import xarray as xr
from extra_data import by_index, RunDirectory, open_run
import extra_data as ed
from extra_data.read_machinery import find_proposal
from .misc.bunch_pattern import extractBunchPattern
......@@ -21,32 +24,60 @@ from .util.exceptions import *
log = logging.getLogger(__name__)
def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=False,
subset=by_index[:], rois={}, useBPTable=True):
""" Load a run and extract the data. Output is an xarray with aligned trainIds
Inputs:
fields: list of mnemonic strings to load specific data such as "fastccd", "SCS_XGM",
or dictionnaries defining a custom mnemonic such as
{"extra": {'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'actual_current.value', None}}
runNB: (str, int) run number as integer
proposalNB: (str, int) of the proposal number e.g. 'p002252' or 2252
subFolder: (str) sub-folder from which to load the data. Use 'raw' for raw
data or 'proc' for processed data.
display: (bool) whether to show the run.info or not
validate: (bool) whether to run extra-data-validate or not
subset: a subset of train that can be load with by_index[:5] for the
first 5 trains
rois: a dictionnary of mnemonics with a list of rois definition and the desired
names, for example {'fastccd':{'ref':{'roi':by_index[730:890, 535:720],
'dim': ['ref_x', 'ref_y']}, 'sam':{'roi':by_index[1050:1210, 535:720],
'dim': ['sam_x', 'sam_y']}}}
useBPTable: If True, uses the raw bunch pattern table to extract sase pulse
number and indices in the trains. If false, load the data from BUNCH_DECODER
middle layer device.
Outputs:
res: an xarray DataSet with aligned trainIds
def load(fields, runNB, proposalNB,
subFolder='raw',
display=False,
validate=False,
subset=ed.by_index[:],
rois={},
useBPTable=True):
"""
Load a run and extract the data. Output is an xarray with aligned
trainIds
Parameters
----------
fields: list of dictionaries
list of mnemonic strings to load specific data such as "fastccd",
"SCS_XGM", or dictionnaries defining a custom mnemonic such as
{"extra":
{'SCS_CDIFFT_MAG/SUPPLY/CURRENT',
'actual_current.value', None}}
runNB: (str, int)
run number as integer
proposalNB: (str, int)
of the proposal number e.g. 'p002252' or 2252
subFolder: (str)
sub-folder from which to load the data. Use 'raw' for raw data
or 'proc' for processed data.
display: (bool)
whether to show the run.info or not
validate: (bool)
whether to run extra-data-validate or not
subset:
a subset of train that can be load with by_index[:5] for the first 5
trains
rois: dictionary
a dictionnary of mnemonics with a list of rois definition and
the desired names, for example:
{'fastccd':
{'ref':
{'roi': by_index[730:890, 535:720],
'dim': ['ref_x', 'ref_y']},
'sam':
{'roi':by_index[1050:1210, 535:720],
'dim': ['sam_x', 'sam_y']}}}
useBPTable: boolean
If True, uses the raw bunch pattern table to extract sase pulse number
and indices in the trains. If false, load the data from BUNCH_DECODER
middle layer device.
Returns
-------
res: xarray.DataArray
an xarray DataSet with aligned trainIds
"""
if isinstance(runNB, int):
......@@ -54,7 +85,7 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
if isinstance(proposalNB,int):
proposalNB = 'p{:06d}'.format(proposalNB)
runFolder = os.path.join(find_proposal(proposalNB), subFolder, runNB)
run = RunDirectory(runFolder).select_trains(subset)
run = ed.RunDirectory(runFolder).select_trains(subset)
if validate:
get_ipython().system('extra-data-validate ' + runFolder)
......@@ -86,7 +117,8 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
if type(f) == dict:
# extracting mnemomic defined on the spot
if len(f.keys()) > 1:
print('Loading only one "on-the-spot" mnemonic at a time, skipping all others !')
print('Loading only one "on-the-spot" mnemonic at a time, '
'skipping all others !')
k = list(f.keys())[0]
v = f[k]
else:
......@@ -110,12 +142,15 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
if k not in rois:
# no ROIs selection, we read everything
vals.append(run.get_array(v['source'], v['key'], extra_dims=v['dim']))
vals.append(run.get_array(v['source'], v['key'],
extra_dims=v['dim']))
keys.append(k)
else:
# ROIs selection, for each ROI we select a region of the data and save it with new name and dimensions
# ROIs selection, for each ROI we select a region of the data and
# save it with new name and dimensions
for nk,nv in rois[k].items():
vals.append(run.get_array(v['source'], v['key'], extra_dims=nv['dim'], roi=nv['roi']))
vals.append(run.get_array(v['source'], v['key'],
extra_dims=nv['dim'], roi=nv['roi']))
keys.append(nk)
aligned_vals = xr.align(*vals, join='inner')
......@@ -126,12 +161,12 @@ def load(fields, runNB, proposalNB, subFolder='raw', display=False, validate=Fal
return result
def run_by_proposal(proposal, run, **kwargs):
def load_run(proposal, run, **kwargs):
"""
Get run in given proposal
Wraps the extra_data open_run routine, to ease its use for the
scs-toolbox user.
Wraps the extra_data open_run routine, out of convenience for the toolbox
user.
Parameters
----------
......@@ -153,7 +188,7 @@ def run_by_proposal(proposal, run, **kwargs):
DataCollection object containing information about the specified
run. Data can be loaded using built-in class methods.
"""
return open_run(proposal, run, **kwargs)
return ed.open_run(proposal, run, **kwargs)
def run_by_path(path):
......@@ -174,7 +209,7 @@ def run_by_path(path):
DataCollection object containing information about the specified
run. Data can be loaded using built-in class methods.
"""
return RunDirectory(path)
return ed.RunDirectory(path)
def concatenateRuns(runs):
......@@ -228,7 +263,7 @@ def load_scan_variable(run, mnemonic, stepsize=None):
Example
-------
>>> import toolbox_scs as tb
>>> run = tb.run_by_proposal(2212, 235)
>>> run = tb.load_run(2212, 235)
>>> mnemonic = 'PP800_PhaseShifter'
>>> scan_variable = tb.load_scan_variable(
self.ed_run, mnemonic, 0.5)
......
......@@ -34,7 +34,7 @@ class TestDetectors(unittest.TestCase):
@classmethod
def setUpClass(cls):
log_root.info("Start global setup.")
cls._run = tb.run_by_proposal(2212, 235)
cls._run = tb.load_run(2212, 235)
fields = ["sase1", "sase3", "npulses_sase3",
"npulses_sase1", "MCP2apd", "SCS_SA3", "nrj"]
......
......@@ -43,6 +43,7 @@ suites = {"no-processing": (
"test_prepareempty",
"test_loadchunkdata",
"test_splitframes",
"test_mergechunks",
),
"full": (
"test_info",
......@@ -51,6 +52,7 @@ suites = {"no-processing": (
"test_prepareempty",
"test_loadchunkdata",
"test_splitframes",
"test_mergechunks",
"test_processmodule",
)
}
......@@ -77,7 +79,7 @@ class TestDSSC(unittest.TestCase):
cls._scanfile = './tmp/scan.h5'
cls._maskfile = './tmp/mask.h5'
cls._run = tb.run_by_proposal(proposal, run_nr, include='*DA*')
cls._run = tb.load_run(proposal, run_nr, include='*DA*')
cls._scan_variable = tb.load_scan_variable(cls._run,
scan_variable, stepsize)
cls._scan_variable.to_netcdf(cls._scanfile, group='data', mode='w',
......@@ -154,6 +156,15 @@ class TestDSSC(unittest.TestCase):
ed.by_index[start_index:start_index + chunksize])
data = load_chunk_data(sel, sourcename)
self.assertIsNotNone(data)
log_root.debug(f"Loaded {ntrains} trains for {sourcename}")
def test_splitframes(self):
pass
def test_mergechunks(self):
pass
def test_processmodule(self):
......@@ -182,8 +193,6 @@ class TestDSSC(unittest.TestCase):
print('finished processing modules:', strftime('%X'))
def test_splitframes(self):
pass
def list_suites():
print("""\nPossible test suites:\n-------------------------""")
......
......@@ -20,7 +20,7 @@ suites = {"packaging": (
"load": (
"test_load",
"test_openrun",
#"test_openrunpath",
"test_openrunpath",
"test_loadscanvariable1",
"test_loadscanvariable2",
)
......@@ -65,7 +65,7 @@ class TestToolbox(unittest.TestCase):
self.assertEqual(run_tb['npulses_sase3'].values[0], 42)
def test_openrun(self):
run = tb.run_by_proposal(2212, 235)
run = tb.load_run(2212, 235)
src = 'SCS_DET_DSSC1M-1/DET/0CH0:xtdf'
self.assertTrue(src in run.all_sources)
......
......@@ -39,6 +39,11 @@ def find_run_dir(proposal, run):
Error raised if the constructed path does not exist. This may
happen when entering a non-valid run number, or the folder has
been renamed/removed.
Comment
-------
The rather unspecified Exeption raised, when entering a invalid proposal
number stems from the ed package -> to be fixed externally.
"""
rdir = None
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment