From 184bee16cd70e94607f93b0644b3c567ada54b51 Mon Sep 17 00:00:00 2001
From: Laurent Mercadier <laurent.mercadier@xfel.eu>
Date: Mon, 19 Jun 2023 12:13:49 +0200
Subject: [PATCH] Adds function to check data rate (missing trains)

---
 src/toolbox_scs/load.py | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/toolbox_scs/load.py b/src/toolbox_scs/load.py
index 4edbcc3..a28f1c4 100644
--- a/src/toolbox_scs/load.py
+++ b/src/toolbox_scs/load.py
@@ -31,6 +31,7 @@ __all__ = [
     'open_run',
     'run_by_path',
     'load_run_values',
+    'check_data_rate',
 ]
 
 log = logging.getLogger(__name__)
@@ -209,6 +210,7 @@ def load(proposalNB=None, runNB=None,
                           'Skipping!')
                     continue
                 data_arrays.append(arr)
+    # Check missing trains
     for arr in data_arrays:
         rate = arr.sizes["trainId"] / len(run.train_ids)
         if rate < 0.95:
@@ -494,3 +496,48 @@ def concatenateRuns(runs):
     for k in orderedRuns[0].attrs.keys():
         result.attrs[k] = [run.attrs[k] for run in orderedRuns]
     return result
+
+
+def check_data_rate(run, mnemonics=None):
+    """
+    Calculates the fraction of train ids that contain data in a run.
+
+    Parameters
+    ----------
+    run: extra_data DataCollection
+        the DataCollection associated to the data.
+    mnemonics: str, list of str or dict
+        mnemonics to check. If None, all mnemonics in the run are checked.
+        A custom mnemonic can be defined with a dictionnary: {'extra':
+        {'source': 'SCS_CDIFFT_MAG/SUPPLY/CURRENT', 'key':
+        'actual_current.value'}}
+    Output
+    ------
+        ret: dictionnary
+        dictionnary with mnemonic as keys and fraction of train ids
+        that contain data as values.
+    """
+    run_mnemonics = mnemonics_for_run(run)
+    if mnemonics is None:
+        mnemonics = run_mnemonics
+    mnemonics = [mnemonics] if isinstance(mnemonics, str) else mnemonics
+    ret = {}
+    for m in mnemonics:
+        if isinstance(m, dict):
+            name = list(m.keys())[0]
+            val = m[name]
+            m = name
+        elif m not in run_mnemonics:
+            log.warning(f'mnemonic {m} not found. Skipping!')
+            continue
+        else:
+            val = run_mnemonics[m]
+        counts = run[val['source']][val['key']].data_counts(False)
+        npulses = counts.max()
+        if npulses == 0:  # (only missing data)
+            rate = 0.
+        else:
+            counts = counts / npulses  # to only count trains and not pulses
+            rate = counts.sum() / len(run.train_ids)
+        ret[m] = rate
+    return ret
-- 
GitLab