Skip to content
Snippets Groups Projects
Commit 1a00309b authored by Cyril Danilevski's avatar Cyril Danilevski :scooter:
Browse files

Fix get_dir_creation_time

We cannot rely on the directory creation date, as it might get modified with filesystem upgrades. File modification dates are conserved though (not creation date), so we must use these when the information is not available from run metadata
parent 507adb34
No related branches found
No related tags found
1 merge request!421Make get_dir_creation_time use files creation dates, not folder
...@@ -8,7 +8,7 @@ from os.path import isfile ...@@ -8,7 +8,7 @@ from os.path import isfile
from pathlib import Path from pathlib import Path
from queue import Queue from queue import Queue
from time import sleep from time import sleep
from typing import Optional, Union from typing import Union
from urllib.parse import urljoin from urllib.parse import urljoin
import dateutil.parser import dateutil.parser
...@@ -237,8 +237,8 @@ def get_dir_creation_date(directory: Union[str, Path], run: int, ...@@ -237,8 +237,8 @@ def get_dir_creation_date(directory: Union[str, Path], run: int,
""" """
Return run start time from MyDC. Return run start time from MyDC.
If not available from MyMDC, retrieve the data from the dataset's metadata If not available from MyMDC, retrieve the data from the dataset's metadata
in [directory]/[run] or, if the dataset is older than 2020, from the in [directory]/[run] or, if the dataset is older than 2020, from the oldest
directory's creation time. file's modified time.
If the data is not available from either source, this function will raise a If the data is not available from either source, this function will raise a
ValueError. ValueError.
...@@ -267,18 +267,18 @@ def get_dir_creation_date(directory: Union[str, Path], run: int, ...@@ -267,18 +267,18 @@ def get_dir_creation_date(directory: Union[str, Path], run: int,
while ntries > 0: while ntries > 0:
try: try:
rfiles = list(directory.glob('*.h5')) rfiles = list(directory.glob('*.h5'))
rfiles.sort(key=path.getmtime)
# get creation time for oldest file, # get creation time for oldest file,
# as creation time between run files # as creation time between run files
# should be different only within few seconds # should differ by a few seconds only.
with h5py.File(rfiles[0], 'r') as fin: rfile = sorted(rfiles, key=path.getmtime)[0]
with h5py.File(rfile, 'r') as fin:
cdate = fin['METADATA/creationDate'][0].decode() cdate = fin['METADATA/creationDate'][0].decode()
cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ") cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ")
return cdate return cdate
except (IndexError, IOError, ValueError): except (IndexError, IOError, ValueError):
ntries -= 1 ntries -= 1
except KeyError: # The files are here, but it's an older dataset except KeyError: # The files are here, but it's an older dataset
return datetime.datetime.fromtimestamp(directory.stat().st_ctime) return datetime.datetime.fromtimestamp(rfile.stat().st_mtime)
msg = 'Could not get the creation time from the directory' msg = 'Could not get the creation time from the directory'
raise ValueError(msg, directory) raise ValueError(msg, directory)
......
...@@ -29,4 +29,4 @@ def test_dir_creation_date(): ...@@ -29,4 +29,4 @@ def test_dir_creation_date():
# The following data predates the addition of creation_time in metadata # The following data predates the addition of creation_time in metadata
date = get_dir_creation_date(folder, 9999) date = get_dir_creation_date(folder, 9999)
assert isinstance(date, datetime) assert isinstance(date, datetime)
assert str(date) == '2021-01-25 20:30:52.818820' assert str(date) == '2019-12-16 08:52:25.196603'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment