From 8ef805e14a0aded30db1b74f839b098fae11f832 Mon Sep 17 00:00:00 2001 From: ahmedk <karim.ahmed@xfel.eu> Date: Fri, 18 Feb 2022 07:45:21 +0100 Subject: [PATCH] first draft for fixing get_dir_creation_date --- src/cal_tools/tools.py | 45 +++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/src/cal_tools/tools.py b/src/cal_tools/tools.py index 9a20cfe1d..2826ac047 100644 --- a/src/cal_tools/tools.py +++ b/src/cal_tools/tools.py @@ -273,6 +273,19 @@ def get_run_info(proposal, run): return resp.json() +def get_run_creation_date_mdc(proposal, run): + """ + Get run directory creation date from myMDC using metadata client. + using method `get_proposal_runs`. + """ + run_info = get_run_info(proposal, run) + return datetime.datetime.strptime( + run_info['begin_at'], "%Y:%m:%dT%H:%M:%SZ", + ).replace(tzinfo=datetime.timezone.utc) + +def get_run_metadata(proposal, run): + + def get_dir_creation_date(directory: Union[str, Path], run: int, verbosity: int = 0) -> datetime.datetime: """ @@ -291,37 +304,37 @@ def get_dir_creation_date(directory: Union[str, Path], run: int, """ directory = Path(directory) + proposal = int(directory.parent.name[1:]) + md_dict = RunDirectory(directory).run_metadata() + data_fmt_version = md_dict["dataFormatVersion"] + try: - run_info = get_run_info(proposal, run) - return dateutil.parser.parse(run_info['begin_at']) + get_run_creation_date_mdc(proposal, run) except Exception as e: if verbosity > 0: print(e) directory = directory / 'r{:04d}'.format(run) - + + # TODO: is this still needed? # Loop a number of times to catch stale file handle errors, due to # migration or gpfs sync. ntries = 100 while ntries > 0: try: - rfiles = list(directory.glob('*.h5')) - # get creation time for oldest file, - # as creation time between run files - # should differ by a few seconds only. - rfile = sorted(rfiles, key=path.getmtime)[0] - with h5py.File(rfile, 'r') as fin: - cdate = fin['METADATA/creationDate'][0].decode() - cdate = datetime.datetime.strptime( - cdate, - "%Y%m%dT%H%M%SZ").replace(tzinfo=datetime.timezone.utc) - return cdate + md_dict = open_run(proposal=proposal, run=run).run_metadata() + if md_dict["dataFormatVersion"] != "0.5": + return datetime.datetime.strptime( + md_dict["creationDate"], + "%Y%m%dT%H%M%SZ", + ).replace(tzinfo=datetime.timezone.utc) + else: + return datetime.datetime.fromtimestamp(rfile.stat().st_mtime) except (IndexError, IOError, ValueError): ntries -= 1 - except KeyError: # The files are here, but it's an older dataset - return datetime.datetime.fromtimestamp(rfile.stat().st_mtime) + return cdate msg = 'Could not get the creation time from the directory' raise ValueError(msg, directory) -- GitLab