From d20c2f457293aa967468741b0d1f93751f23c4da Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Wed, 21 Feb 2024 16:22:31 +0100
Subject: [PATCH] add new function to extra sequence number from data filename

---
 src/cal_tools/tools.py  | 17 +++++++++++++++++
 tests/test_cal_tools.py | 12 ++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py
index d9e5e5f9e..c72a729bb 100644
--- a/src/cal_tools/tools.py
+++ b/src/cal_tools/tools.py
@@ -1065,3 +1065,20 @@ def raw_data_location_string(proposal: str, runs: List[int]):
             " a preceding 'p'. Example: 'p900203'")
 
     return f"proposal:{proposal} runs:{' '.join(map(str, runs))}"
+
+
+def extract_sequence_number(f: str):
+    """Extract sequence number from input.
+    Args:
+        f (str):  EXDF data filename.
+            e.g. /gpfs/exfel/exp/CALLAB/202130/p900203/raw/r9046/RAW-R9046-EPIX01-S00000.h5
+    Raises:
+        ValueError: Sxxxxx is missing from input file
+    Returns:
+        int: sequence number.
+    """
+    match = re.search(r'-S(\d+)', f)
+    if match:
+        return int(match.group(1))
+    else:
+        raise ValueError(f"No matching pattern for a sequence in the filename {f}.")
\ No newline at end of file
diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py
index 577a82a4f..795d9ce97 100644
--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -13,6 +13,7 @@ from cal_tools.plotting import show_processed_modules
 from cal_tools.tools import (
     creation_date_file_metadata,
     creation_date_train_timestamp,
+    extract_sequence_number,
     get_dir_creation_date,
     get_from_db,
     get_pdu_from_db,
@@ -581,3 +582,14 @@ def test_raise_raw_data_location_string():
 
     with pytest.raises(ValueError):
         raw_data_location_string("900203", [9008, 9009, 9010])
+
+
+def test_extract_sequence_number():
+    seq_num = extract_sequence_number(
+        "/gpfs/exfel/exp/CALLAB/202031/p900113/raw/r9983/RAW-R9983-AGIPD00-S00012.h5")
+    assert seq_num == 12
+
+
+def test_raise_extract_sequence_number():
+    with pytest.raises(ValueError):
+        extract_sequence_number("/gpfs/exfel/exp/CALLAB/202031/p900113/raw/r9983")
-- 
GitLab