diff --git a/cal_tools/cal_tools/tools.py b/cal_tools/cal_tools/tools.py index 0f46f959ec4433b264c5591f08ed5c1bdeb572a4..5f1170d52d52f63d55ed8a535f24e52d435c1e55 100644 --- a/cal_tools/cal_tools/tools.py +++ b/cal_tools/cal_tools/tools.py @@ -2,8 +2,9 @@ from collections import OrderedDict import datetime from glob import glob import json -from os import environ, listdir, path, stat -from os.path import isfile, splitext +from os import environ, listdir, path +from os.path import isfile +from pathlib import Path from queue import Queue import re from time import sleep @@ -11,6 +12,7 @@ from typing import Optional from urllib.parse import urljoin import dateutil.parser +import h5py import ipykernel from metadata_client.metadata_client import MetadataClient from notebook.notebookapp import list_running_servers @@ -229,47 +231,57 @@ def get_run_info(proposal, run): def get_dir_creation_date(directory: str, run: int, - tsdir: Optional[bool] = False, - verbosity: Optional[int] = 0): + verbosity: Optional[int] = 0) -> datetime.datetime: """ - Return run starting time from the MDC. - If not succeeded, return modification time of oldest file.h5 - in [directory]/[run]04. + Return run start time from MyDC. + If not available from MyMDC, retrieve the data from the dataset's metadata + in [directory]/[run] or, if the dataset is older than 2020, from the + directory's creation time. + + If the data is not available from either source, this function will raise a + ValueError. :param directory: path to directory which contains runs :param run: run number - :param tsdir: to get modification time of [directory]/[run]04. :param verbosity: Level of verbosity (0 - silent) :return: (datetime) modification time + """ + directory = Path(directory) + proposal = int(directory.parent.name[1:]) try: - path_list = list(filter(None, directory.strip('/').split('/'))) - proposal = int(path_list[-2][1:]) run_info = get_run_info(proposal, run) return dateutil.parser.parse(run_info['begin_at']) except Exception as e: if verbosity > 0: print(e) + directory = directory / 'r{:04d}'.format(run) + + # Loop a number of times to catch stale file handle errors, due to + # migration or gpfs sync. ntries = 100 while ntries > 0: try: - if tsdir: - creation_time = stat("{}/r{:04d}".format(directory, run)).st_mtime - else: - rfiles = glob("{}/r{:04d}/*.h5".format(directory, run)) - rfiles.sort(key=path.getmtime) - creation_time = stat(rfiles[0]).st_mtime - - creation_time = datetime.datetime.fromtimestamp(creation_time) - return creation_time - except: # catch stale file handle errors etc and try again + dates = [] + for f in directory.glob('*.h5'): + with h5py.File(f, 'r') as fin: + cdate = fin['METADATA/creationDate'][0].decode() + cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ") + dates.append(cdate) + return min(dates) + except (IOError, ValueError): ntries -= 1 + except KeyError: # The files are here, but it's an older dataset + return datetime.datetime.fromtimestamp(directory.stat().st_ctime) + + msg = 'Could not get the creation time from the directory' + raise ValueError(msg, directory) def save_const_to_h5(device, constant, condition, data, - file_loc, creation_time, out_folder): + file_loc, creation_time, out_folder): """ Save constant in h5 file with its metadata (e.g. db_module, condition, creation_time) @@ -280,7 +292,7 @@ def save_const_to_h5(device, constant, condition, data, :type constant: iCalibrationDB.know_constants object :param condition: Calibration condition :type condition: iCalibrationDB.know_detector_conditions object - :param data: Constant data to save + :param data: Constant data to save :type data: ndarray :param file_loc: Location of raw data "proposal:{} runs:{} {} {}" :type file_loc: str diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..865eb90c46705d8cd858eb5530a999cbda2b24e7 --- /dev/null +++ b/tests/test_cal_tools.py @@ -0,0 +1,24 @@ +from datetime import datetime +from pathlib import Path + +import pytest +from cal_tools.tools import get_dir_creation_date + + +def test_dir_creation_date(): + folder = '/gpfs/exfel/exp/DETLAB/202031/p900172/raw' + + date = get_dir_creation_date(folder, 10) + assert isinstance(date, datetime) + assert str(date) == '2020-07-20 10:39:03' + + with pytest.raises(ValueError) as e: + get_dir_creation_date(folder, 4) + assert e.value.args[1] == Path(folder) / 'r0004' + + # The following data predates the addition of creation_time in metadata + folder = '/gpfs/exfel/exp/SQS/201930/p900075/raw/' + + date = get_dir_creation_date(folder, 365) + assert isinstance(date, datetime) + assert str(date) == '2019-07-04 11:02:41.280000'