Skip to content
Snippets Groups Projects
Commit e235e1e4 authored by Cyril Danilevski's avatar Cyril Danilevski :scooter:
Browse files

Retrieve dataset creation date from metadata, if available.

See merge request detectors/pycalibration!364
parents dcc2d492 3ae9df45
No related branches found
No related tags found
1 merge request!364Sanitize cal_tools.tools.get_creation_date
...@@ -2,8 +2,9 @@ from collections import OrderedDict ...@@ -2,8 +2,9 @@ from collections import OrderedDict
import datetime import datetime
from glob import glob from glob import glob
import json import json
from os import environ, listdir, path, stat from os import environ, listdir, path
from os.path import isfile, splitext from os.path import isfile
from pathlib import Path
from queue import Queue from queue import Queue
import re import re
from time import sleep from time import sleep
...@@ -11,6 +12,7 @@ from typing import Optional ...@@ -11,6 +12,7 @@ from typing import Optional
from urllib.parse import urljoin from urllib.parse import urljoin
import dateutil.parser import dateutil.parser
import h5py
import ipykernel import ipykernel
from metadata_client.metadata_client import MetadataClient from metadata_client.metadata_client import MetadataClient
from notebook.notebookapp import list_running_servers from notebook.notebookapp import list_running_servers
...@@ -229,47 +231,57 @@ def get_run_info(proposal, run): ...@@ -229,47 +231,57 @@ def get_run_info(proposal, run):
def get_dir_creation_date(directory: str, run: int, def get_dir_creation_date(directory: str, run: int,
tsdir: Optional[bool] = False, verbosity: Optional[int] = 0) -> datetime.datetime:
verbosity: Optional[int] = 0):
""" """
Return run starting time from the MDC. Return run start time from MyDC.
If not succeeded, return modification time of oldest file.h5 If not available from MyMDC, retrieve the data from the dataset's metadata
in [directory]/[run]04. in [directory]/[run] or, if the dataset is older than 2020, from the
directory's creation time.
If the data is not available from either source, this function will raise a
ValueError.
:param directory: path to directory which contains runs :param directory: path to directory which contains runs
:param run: run number :param run: run number
:param tsdir: to get modification time of [directory]/[run]04.
:param verbosity: Level of verbosity (0 - silent) :param verbosity: Level of verbosity (0 - silent)
:return: (datetime) modification time :return: (datetime) modification time
""" """
directory = Path(directory)
proposal = int(directory.parent.name[1:])
try: try:
path_list = list(filter(None, directory.strip('/').split('/')))
proposal = int(path_list[-2][1:])
run_info = get_run_info(proposal, run) run_info = get_run_info(proposal, run)
return dateutil.parser.parse(run_info['begin_at']) return dateutil.parser.parse(run_info['begin_at'])
except Exception as e: except Exception as e:
if verbosity > 0: if verbosity > 0:
print(e) print(e)
directory = directory / 'r{:04d}'.format(run)
# Loop a number of times to catch stale file handle errors, due to
# migration or gpfs sync.
ntries = 100 ntries = 100
while ntries > 0: while ntries > 0:
try: try:
if tsdir: dates = []
creation_time = stat("{}/r{:04d}".format(directory, run)).st_mtime for f in directory.glob('*.h5'):
else: with h5py.File(f, 'r') as fin:
rfiles = glob("{}/r{:04d}/*.h5".format(directory, run)) cdate = fin['METADATA/creationDate'][0].decode()
rfiles.sort(key=path.getmtime) cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ")
creation_time = stat(rfiles[0]).st_mtime dates.append(cdate)
return min(dates)
creation_time = datetime.datetime.fromtimestamp(creation_time) except (IOError, ValueError):
return creation_time
except: # catch stale file handle errors etc and try again
ntries -= 1 ntries -= 1
except KeyError: # The files are here, but it's an older dataset
return datetime.datetime.fromtimestamp(directory.stat().st_ctime)
msg = 'Could not get the creation time from the directory'
raise ValueError(msg, directory)
def save_const_to_h5(device, constant, condition, data, def save_const_to_h5(device, constant, condition, data,
file_loc, creation_time, out_folder): file_loc, creation_time, out_folder):
""" """
Save constant in h5 file with its metadata Save constant in h5 file with its metadata
(e.g. db_module, condition, creation_time) (e.g. db_module, condition, creation_time)
...@@ -280,7 +292,7 @@ def save_const_to_h5(device, constant, condition, data, ...@@ -280,7 +292,7 @@ def save_const_to_h5(device, constant, condition, data,
:type constant: iCalibrationDB.know_constants object :type constant: iCalibrationDB.know_constants object
:param condition: Calibration condition :param condition: Calibration condition
:type condition: iCalibrationDB.know_detector_conditions object :type condition: iCalibrationDB.know_detector_conditions object
:param data: Constant data to save :param data: Constant data to save
:type data: ndarray :type data: ndarray
:param file_loc: Location of raw data "proposal:{} runs:{} {} {}" :param file_loc: Location of raw data "proposal:{} runs:{} {} {}"
:type file_loc: str :type file_loc: str
......
from datetime import datetime
from pathlib import Path
import pytest
from cal_tools.tools import get_dir_creation_date
def test_dir_creation_date():
folder = '/gpfs/exfel/exp/DETLAB/202031/p900172/raw'
date = get_dir_creation_date(folder, 10)
assert isinstance(date, datetime)
assert str(date) == '2020-07-20 10:39:03'
with pytest.raises(ValueError) as e:
get_dir_creation_date(folder, 4)
assert e.value.args[1] == Path(folder) / 'r0004'
# The following data predates the addition of creation_time in metadata
folder = '/gpfs/exfel/exp/SQS/201930/p900075/raw/'
date = get_dir_creation_date(folder, 365)
assert isinstance(date, datetime)
assert str(date) == '2019-07-04 11:02:41.280000'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment