diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb index f9f57a7943b5e8ffcf7ee5ebb3b94c5ff240c27b..8f5bc62e55b21bd0cd552d14c8b1c55339ea6a49 100644 --- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb @@ -108,7 +108,8 @@ "source": [ "in_folder = Path(in_folder)\n", "out_folder = Path(out_folder)\n", - "run_dc = RunDirectory(in_folder / f'r{run:04d}')\n", + "run_folder = in_folder / f'r{run:04d}'\n", + "run_dc = RunDirectory(run_folder)\n", "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n", "\n", "if out_folder.exists() and not overwrite:\n", @@ -137,7 +138,7 @@ "source": [ "# Read available sequence files to correct.\n", "mapped_files, num_seq_files = map_seq_files(\n", - " run_dc, karabo_id, karabo_da, sequences)\n", + " run_folder, karabo_da, sequences)\n", "\n", "if not len(mapped_files):\n", " raise IndexError(\n", diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py index 88a974c27fe90dc8884b1138e7cbb0ca7f70c9a2..db913f26fe57182ec9d89dcc3a142c301110faff 100644 --- a/src/cal_tools/tools.py +++ b/src/cal_tools/tools.py @@ -19,7 +19,7 @@ import numpy as np import requests import yaml import zmq -from extra_data import RunDirectory +from extra_data import H5File, RunDirectory from iCalibrationDB import ConstantMetaData, Versions from metadata_client.metadata_client import MetadataClient from notebook.notebookapp import list_running_servers @@ -59,14 +59,13 @@ def run_prop_seq_from_path(filename): def map_seq_files( - run_dc: "extra_data.DataCollection", - karabo_id: str, + run_folder: Path, karabo_das: List[str], sequences: Optional[List[int]] = None, ) -> Tuple[dict, int]: - """Using a DataCollection from extra-data to read - available sequence files. + """Glob run_folder and match the files based on the selected + detectors and sequence numbers. Returns: Dict: with karabo_das keys and the corresponding sequence files. @@ -82,8 +81,8 @@ def map_seq_files( mapped_files = {kda: [] for kda in karabo_das} total_files = 0 - for fn in run_dc.select(f"*{karabo_id}*").files: - fn = Path(fn.filename) + + for fn in run_folder.glob("*.h5"): if (match := seq_fn_pat.match(fn.name)) is not None: da = match.group("da") if da in mapped_files and ( @@ -92,6 +91,10 @@ def map_seq_files( mapped_files[da].append(fn) total_files += 1 + # Return dict with sorted list of sequence files. + for k in mapped_files: + mapped_files[k].sort() + return mapped_files, total_files @@ -293,7 +296,7 @@ def creation_date_metadata_client( def creation_date_file_metadata( - dc: RunDirectory + run_folder: Path, ) -> Optional[datetime.datetime]: """Get run directory creation date from METADATA/CreationDate of the oldest file using EXtra-data. @@ -302,15 +305,15 @@ def creation_date_file_metadata( :param dc: EXtra-data DataCollection for the run directory. :return Optional[datetime.datetime]: Run creation date. """ + md_dict = RunDirectory(run_folder).run_metadata() - md_dict = dc.run_metadata() if md_dict["dataFormatVersion"] != "0.5": - oldest_file = sorted( - dc.files, key=lambda x: x.metadata()["creationDate"])[0] + creation_dates = [ + H5File(f).run_metadata()["creationDate"] + for f in run_folder.glob("*.h5") + ] return datetime.datetime.strptime( - oldest_file.metadata()["creationDate"], - "%Y%m%dT%H%M%S%z", - ) + min(creation_dates), "%Y%m%dT%H%M%S%z") else: print("WARNING: input files contains old datasets. " "No `METADATA/creationDate` to read.") @@ -385,8 +388,7 @@ def get_dir_creation_date(directory: Union[str, Path], run: int, "for the oldest input file.") cdate = datetime.datetime.fromtimestamp( sorted( - [Path(f.filename) for f in dc.files], - key=path.getmtime + directory.glob("*.h5"), key=path.getmtime, )[0].stat().st_mtime, tz=datetime.timezone.utc, ) diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py index 6e7f44b279dbf59788acf1e1f73fd0e21e1cd7e9..e2c494f3ed0bf328517d1dc1a2c06a665e6247c7 100644 --- a/tests/test_cal_tools.py +++ b/tests/test_cal_tools.py @@ -17,6 +17,7 @@ from cal_tools.tools import ( get_dir_creation_date, get_from_db, get_pdu_from_db, + map_seq_files, module_index_to_qm, send_to_db, ) @@ -37,6 +38,7 @@ WRONG_CAL_DB_INTERFACE = "tcp://max-exfl017:0000" PROPOSAL = 900113 + @pytest.fixture def _agipd_const_cond(): # AGIPD dark offset metadata @@ -60,6 +62,30 @@ def test_show_processed_modules(): assert 'LDP' in err.value() +@pytest.mark.requires_gpfs +@pytest.mark.parametrize( + "karabo_da,sequences,expected_len", + [ + ("AGIPD00", [-1], 3), + ("AGIPD00", [0, 1], 2), + ("EPIX01", [-1], 0), + ("AGIPD00", [117], 0), + ], +) +def test_map_seq_files(karabo_da, sequences, expected_len): + run_folder = Path('/gpfs/exfel/exp/CALLAB/202031/p900113/raw/r9983') + expected_dict = {karabo_da: []} + + if expected_len: + sequences = range(expected_len) if sequences == [-1] else sequences + expected_dict = { + karabo_da: [run_folder / f"RAW-R9983-AGIPD00-S0000{s}.h5" for s in sequences] # noqa + } + + assert map_seq_files(run_folder, [karabo_da], sequences) == ( + expected_dict, expected_len) + + @pytest.mark.requires_gpfs def test_dir_creation_date(): """This test is based on not connecting to MDC and failing to use @@ -97,12 +123,14 @@ def test_creation_date_metadata_client(): @pytest.mark.requires_gpfs def test_creation_date_file_metadata(): - date = creation_date_file_metadata(open_run(PROPOSAL, 9983)) + date = creation_date_file_metadata( + Path('/gpfs/exfel/exp/CALLAB/202031/p900113/raw/r9983')) assert isinstance(date, datetime) assert str(date) == '2020-09-23 13:30:50+00:00' # Old run without METADATA/CreationDate - date = creation_date_file_metadata(open_run(PROPOSAL, 9999)) + date = creation_date_file_metadata( + Path('/gpfs/exfel/exp/CALLAB/202031/p900113/raw/r9999')) assert date is None