diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py index 2826ac04728452c81362f0aa0aef597efcd852d2..c837fa5da622334ea8bf450492efa11c8874ff50 100644 --- a/src/cal_tools/tools.py +++ b/src/cal_tools/tools.py @@ -12,8 +12,8 @@ from queue import Queue from time import sleep from typing import List, Optional, Tuple, Union from urllib.parse import urljoin +from extra_data import RunDirectory -import dateutil.parser import h5py import ipykernel import numpy as np @@ -267,77 +267,100 @@ def get_run_info(proposal, run): runs = mdc.get_proposal_runs(proposal_number=proposal, run_number=run) - run_id = runs['data']['runs'][0]['id'] + if len(runs['data']) != 0: + run_id = runs['data']['runs'][0]['id'] + return mdc.get_run_by_id_api(run_id).json() + else: # empty dictionary for wrong proposal or run. + raise KeyError(runs['app_info']) - resp = mdc.get_run_by_id_api(run_id) - return resp.json() - -def get_run_creation_date_mdc(proposal, run): - """ - Get run directory creation date from myMDC using metadata client. +def creation_date_metadata_client( + proposal: int, run: int) -> datetime.datetime: + """ Get run directory creation date from myMDC using metadata client. using method `get_proposal_runs`. - """ + """ + run_info = get_run_info(proposal, run) + return datetime.datetime.strptime( + run_info['begin_at'], "%Y-%m-%dT%H:%M:%S.%f%z", + ).astimezone(datetime.timezone.utc) + + +def creation_date_file_metadata( + dc: RunDirectory +) -> Optional[datetime.datetime]: + """ Get run directory creation date from + METADATA/CreationDate of the oldest file using EXtra-data. + """ + + md_dict = dc.run_metadata() + if md_dict["dataFormatVersion"] != "0.5": + oldest_file = sorted( + dc.files, key=lambda x: x.metadata()["creationDate"])[0] return datetime.datetime.strptime( - run_info['begin_at'], "%Y:%m:%dT%H:%M:%SZ", - ).replace(tzinfo=datetime.timezone.utc) + oldest_file.metadata()["creationDate"], + "%Y%m%dT%H%M%SZ", + ).replace(tzinfo=datetime.timezone.utc) + else: + print("WARNING: input files contains old datasets. " + "No `METADATA/creationDate` to read.") -def get_run_metadata(proposal, run): - def get_dir_creation_date(directory: Union[str, Path], run: int, verbosity: int = 0) -> datetime.datetime: - """ - Return run start time from MyDC. - If not available from MyMDC, retrieve the data from the dataset's metadata - in [directory]/[run] or, if the dataset is older than 2020, from the oldest - file's modified time. + """ Get the directory creation data based on 3 different methods. + + 1) Return run start time from myMDC. (get_runtime_metadata_client) + 2) If myMDC connection is not set, + get the date from the files metadata. (get_runtime_metadata_file) + 3) If data files are older than 2020 (dataformatversion == "0.5"), + get the data from the oldest file's modified time. If the data is not available from either source, - this function will raise a ValueError. + this function will raise a FileNotFoundError. - :param directory: path to directory which contains runs - :param run: run number + :param directory: path to a directory which contains runs + (e.g. /gpfs/exfel/data/exp/callab/202031/p900113/raw/). + :param run: run number. :param verbosity: Level of verbosity (0 - silent) - :return: (datetime) modification time + :return: creation datetime for the directory. """ directory = Path(directory) proposal = int(directory.parent.name[1:]) + directory = directory / 'r{:04d}'.format(run) - md_dict = RunDirectory(directory).run_metadata() - data_fmt_version = md_dict["dataFormatVersion"] + try: + dc = RunDirectory(directory) + except FileNotFoundError as e: + raise FileNotFoundError( + f"- Failed to read creation time, wrong input folder", directory) from e try: - get_run_creation_date_mdc(proposal, run) + return creation_date_metadata_client(proposal, run) except Exception as e: if verbosity > 0: print(e) - directory = directory / 'r{:04d}'.format(run) - - # TODO: is this still needed? - # Loop a number of times to catch stale file handle errors, due to - # migration or gpfs sync. - ntries = 100 - while ntries > 0: - try: - md_dict = open_run(proposal=proposal, run=run).run_metadata() - if md_dict["dataFormatVersion"] != "0.5": - return datetime.datetime.strptime( - md_dict["creationDate"], - "%Y%m%dT%H%M%SZ", - ).replace(tzinfo=datetime.timezone.utc) - else: - return datetime.datetime.fromtimestamp(rfile.stat().st_mtime) - except (IndexError, IOError, ValueError): - ntries -= 1 - return cdate + cdate = creation_date_file_metadata(dc) + + if cdate is not None: + # Exposing the method used for reading the creation_date. + print("Reading creation_date from input files metadata" + " `METADATA/creationDate`") + else: # It's an older dataset. + print("Reading creation_date from last modification data " + "for the oldest input file.") + cdate = datetime.datetime.fromtimestamp( + sorted( + [Path(f.filename) for f in dc.files], + key=path.getmtime + )[0].stat().st_mtime, + tz=datetime.timezone.utc, + ) - msg = 'Could not get the creation time from the directory' - raise ValueError(msg, directory) + return cdate def _init_metadata(constant: 'iCalibrationDB.calibration_constant', diff --git a/tests/conftest.py b/tests/conftest.py index a7cfeb0f1e81716dc083aeed46d6a3e45bb6179b..8b1e2ac4c4beadcaf7b0bbc2d59fe7a3d2d5994b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,13 @@ def pytest_addoption(parser): help="Skips tests marked as requiring GPFS access", ) + parser.addoption( + "--no-mdc", + action="store_true", + default=True, + help="Skips tests marked as requiring myMDC access", + ) + def pytest_configure(config): config.addinivalue_line( @@ -25,6 +32,11 @@ def pytest_configure(config): "requires_caldb(): marks skips for tests that require calDB access", ) + config.addinivalue_line( + "markers", + "requires_mdc(): marks skips for tests that require calDB access", + ) + @lru_cache() def server_reachable(server: str = "max-exfl017"): @@ -46,3 +58,6 @@ def pytest_runtest_setup(item): if list(item.iter_markers(name="requires_caldb")) and not server_reachable(): pytest.skip("caldb not available") + + if list(item.iter_markers(name="requires_mdc")) and item.config.getoption("--no-mdc"): + pytest.skip("myMDC not available") \ No newline at end of file diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py index 102f613b7b9f3b9bdb032f5ebe5ab218a8d1e098..243f6f617de639f8fe61e7eec67d2e77f4bfcb80 100644 --- a/tests/test_cal_tools.py +++ b/tests/test_cal_tools.py @@ -5,11 +5,14 @@ from unittest.mock import patch import numpy as np import pytest import zmq +from extra_data import RunDirectory from iCalibrationDB import Conditions, ConstantMetaData, Constants from cal_tools.agipdlib import AgipdCorrections, CellRange from cal_tools.plotting import show_processed_modules from cal_tools.tools import ( + creation_date_file_metadata, + creation_date_metadata_client, get_dir_creation_date, get_from_db, get_pdu_from_db, @@ -57,22 +60,47 @@ def test_show_processed_modules(): @pytest.mark.requires_gpfs def test_dir_creation_date(): + """This test is based on not connecting to MDC and failing to use + `creation_date_metadata_client()` + """ folder = '/gpfs/exfel/exp/CALLAB/202031/p900113/raw' date = get_dir_creation_date(folder, 9983) assert isinstance(date, datetime) assert str(date) == '2020-09-23 13:30:50+00:00' - with pytest.raises(ValueError) as e: + # The following data predates the addition of creation_time in metadata + date = get_dir_creation_date(folder, 9999) + assert isinstance(date, datetime) + assert str(date) == '2019-12-16 07:52:25.196603+00:00' + + +@pytest.mark.requires_gpfs +def test_raise_dir_creation_date(): + folder = '/gpfs/exfel/exp/CALLAB/202031/p900113/raw' + + with pytest.raises(FileNotFoundError) as e: get_dir_creation_date(folder, 4) + print(e.value) assert e.value.args[1] == Path(folder) / 'r0004' - # The following data predates the addition of creation_time in metadata - date = get_dir_creation_date(folder, 9999) +@pytest.mark.requires_mdc +def test_creation_date_metadata_client(): + + date = creation_date_metadata_client(900113, 9983) assert isinstance(date, datetime) - assert str(date) == '2019-12-16 08:52:25.196603' + assert str(date) == '2020-09-23 13:30:00+00:00' +@pytest.mark.requires_gpfs +def test_creation_date_file_metadata(): + + folder = '/gpfs/exfel/exp/CALLAB/202031/p900113/raw/r9983' + + date = creation_date_file_metadata(RunDirectory(folder)) + assert isinstance(date, datetime) + assert str(date) == '2020-09-23 13:30:50+00:00' + def _call_get_from_db( constant, condition,