From 90b592c623a092a39cba59e929b6edbe5542efa7 Mon Sep 17 00:00:00 2001 From: Cyril Danilevski <cydanil@gmail.com> Date: Thu, 8 Oct 2020 12:21:13 +0200 Subject: [PATCH] Sanitize cal_tools.tools.get_creation_date --- cal_tools/cal_tools/tools.py | 44 +++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/cal_tools/cal_tools/tools.py b/cal_tools/cal_tools/tools.py index 0f46f959e..a22a89cb1 100644 --- a/cal_tools/cal_tools/tools.py +++ b/cal_tools/cal_tools/tools.py @@ -2,8 +2,9 @@ from collections import OrderedDict import datetime from glob import glob import json -from os import environ, listdir, path, stat -from os.path import isfile, splitext +from os import environ, listdir, path +from os.path import isfile +from pathlib import Path from queue import Queue import re from time import sleep @@ -229,47 +230,54 @@ def get_run_info(proposal, run): def get_dir_creation_date(directory: str, run: int, - tsdir: Optional[bool] = False, - verbosity: Optional[int] = 0): + verbosity: Optional[int] = 0) -> datetime.datetime: """ Return run starting time from the MDC. If not succeeded, return modification time of oldest file.h5 in [directory]/[run]04. + If the files are not available, this function will raise a ValueError. + :param directory: path to directory which contains runs :param run: run number :param tsdir: to get modification time of [directory]/[run]04. :param verbosity: Level of verbosity (0 - silent) :return: (datetime) modification time + """ + directory = Path(directory) + proposal = int(directory.parent.name[1:]) try: - path_list = list(filter(None, directory.strip('/').split('/'))) - proposal = int(path_list[-2][1:]) run_info = get_run_info(proposal, run) return dateutil.parser.parse(run_info['begin_at']) except Exception as e: if verbosity > 0: print(e) + directory = directory / 'r{:04d}'.format(run) + + # Loop a number of times to catch stale file handle errors, due to + # migration or gpfs sync. ntries = 100 while ntries > 0: try: - if tsdir: - creation_time = stat("{}/r{:04d}".format(directory, run)).st_mtime - else: - rfiles = glob("{}/r{:04d}/*.h5".format(directory, run)) - rfiles.sort(key=path.getmtime) - creation_time = stat(rfiles[0]).st_mtime - - creation_time = datetime.datetime.fromtimestamp(creation_time) - return creation_time - except: # catch stale file handle errors etc and try again + dates = [] + for f in directory.glob('*.h5'): + with h5py.File(f) as fin: + cdate = fin['METADATA/creationDate'][0].decode() + cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ") + dates.append(cdate) + return min(dates) + except IOError: ntries -= 1 + msg = 'Could not get the creation time from the directory' + raise ValueError(msg, directory) + def save_const_to_h5(device, constant, condition, data, - file_loc, creation_time, out_folder): + file_loc, creation_time, out_folder): """ Save constant in h5 file with its metadata (e.g. db_module, condition, creation_time) @@ -280,7 +288,7 @@ def save_const_to_h5(device, constant, condition, data, :type constant: iCalibrationDB.know_constants object :param condition: Calibration condition :type condition: iCalibrationDB.know_detector_conditions object - :param data: Constant data to save + :param data: Constant data to save :type data: ndarray :param file_loc: Location of raw data "proposal:{} runs:{} {} {}" :type file_loc: str -- GitLab