From 1a00309b6d23c2f64ad57fcaee679b7cfb8405fe Mon Sep 17 00:00:00 2001
From: Cyril Danilevski <cyril.danilevski@xfel.eu>
Date: Wed, 10 Feb 2021 15:11:40 +0100
Subject: [PATCH] Fix get_dir_creation_time

We cannot rely on the directory creation date, as it might get modified with filesystem upgrades. File modification dates are conserved though (not creation date), so we must use these when the information is not available from run metadata
---
 cal_tools/cal_tools/tools.py | 14 +++++++-------
 tests/test_cal_tools.py      |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cal_tools/cal_tools/tools.py b/cal_tools/cal_tools/tools.py
index d88ca857e..5f190d557 100644
--- a/cal_tools/cal_tools/tools.py
+++ b/cal_tools/cal_tools/tools.py
@@ -8,7 +8,7 @@ from os.path import isfile
 from pathlib import Path
 from queue import Queue
 from time import sleep
-from typing import Optional, Union
+from typing import Union
 from urllib.parse import urljoin
 
 import dateutil.parser
@@ -237,8 +237,8 @@ def get_dir_creation_date(directory: Union[str, Path], run: int,
     """
     Return run start time from MyDC.
     If not available from MyMDC, retrieve the data from the dataset's metadata
-    in [directory]/[run] or, if the dataset is older than 2020, from the
-    directory's creation time.
+    in [directory]/[run] or, if the dataset is older than 2020, from the oldest
+    file's modified time.
 
     If the data is not available from either source, this function will raise a
     ValueError.
@@ -267,18 +267,18 @@ def get_dir_creation_date(directory: Union[str, Path], run: int,
     while ntries > 0:
         try:
             rfiles = list(directory.glob('*.h5'))
-            rfiles.sort(key=path.getmtime)
             # get creation time for oldest file,
             # as creation time between run files
-            # should be different only within few seconds
-            with h5py.File(rfiles[0], 'r') as fin:
+            # should differ by a few seconds only.
+            rfile = sorted(rfiles, key=path.getmtime)[0]
+            with h5py.File(rfile, 'r') as fin:
                 cdate = fin['METADATA/creationDate'][0].decode()
                 cdate = datetime.datetime.strptime(cdate, "%Y%m%dT%H%M%SZ")
             return cdate
         except (IndexError, IOError, ValueError):
             ntries -= 1
         except KeyError:  # The files are here, but it's an older dataset
-            return datetime.datetime.fromtimestamp(directory.stat().st_ctime)
+            return datetime.datetime.fromtimestamp(rfile.stat().st_mtime)
 
     msg = 'Could not get the creation time from the directory'
     raise ValueError(msg, directory)
diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py
index 208066682..24d61719c 100644
--- a/tests/test_cal_tools.py
+++ b/tests/test_cal_tools.py
@@ -29,4 +29,4 @@ def test_dir_creation_date():
     # The following data predates the addition of creation_time in metadata
     date = get_dir_creation_date(folder, 9999)
     assert isinstance(date, datetime)
-    assert str(date) == '2021-01-25 20:30:52.818820'
+    assert str(date) == '2019-12-16 08:52:25.196603'
-- 
GitLab