From 184bee16cd70e94607f93b0644b3c567ada54b51 Mon Sep 17 00:00:00 2001 From: Laurent Mercadier <laurent.mercadier@xfel.eu> Date: Mon, 19 Jun 2023 12:13:49 +0200 Subject: [PATCH] Adds function to check data rate (missing trains) --- src/toolbox_scs/load.py | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py index 4edbcc3..a28f1c4 100644 --- a/src/toolbox_scs/load.py +++ b/src/toolbox_scs/load.py @@ -31,6 +31,7 @@ __all__ = [ 'open_run', 'run_by_path', 'load_run_values', + 'check_data_rate', ] log = logging.getLogger(__name__) @@ -209,6 +210,7 @@ def load(proposalNB=None, runNB=None, 'Skipping!') continue data_arrays.append(arr) + # Check missing trains for arr in data_arrays: rate = arr.sizes["trainId"] / len(run.train_ids) if rate < 0.95: @@ -494,3 +496,48 @@ def concatenateRuns(runs): for k in orderedRuns[0].attrs.keys(): result.attrs[k] = [run.attrs[k] for run in orderedRuns] return result + + +def check_data_rate(run, mnemonics=None): + """ + Calculates the fraction of train ids that contain data in a run. + + Parameters + ---------- + run: extra_data DataCollection + the DataCollection associated to the data. + mnemonics: str, list of str or dict + mnemonics to check. If None, all mnemonics in the run are checked. + A custom mnemonic can be defined with a dictionnary: {'extra': + {'source': 'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'key': + 'actual_current.value'}} + Output + ------ + ret: dictionnary + dictionnary with mnemonic as keys and fraction of train ids + that contain data as values. + """ + run_mnemonics = mnemonics_for_run(run) + if mnemonics is None: + mnemonics = run_mnemonics + mnemonics = [mnemonics] if isinstance(mnemonics, str) else mnemonics + ret = {} + for m in mnemonics: + if isinstance(m, dict): + name = list(m.keys())[0] + val = m[name] + m = name + elif m not in run_mnemonics: + log.warning(f'mnemonic {m} not found. Skipping!') + continue + else: + val = run_mnemonics[m] + counts = run[val['source']][val['key']].data_counts(False) + npulses = counts.max() + if npulses == 0: # (only missing data) + rate = 0. + else: + counts = counts / npulses # to only count trains and not pulses + rate = counts.sum() / len(run.train_ids) + ret[m] = rate + return ret -- GitLab