From d59aabb02dd7bd4397fb4ae786ecc83b43fe2c21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Le=20Guyader?= <loic.le.guyader@xfel.eu>
Date: Wed, 8 Apr 2020 17:20:16 +0200
Subject: [PATCH] Keep virtual dataset h5 files closed outside 'with' context

---
 DSSC.py | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/DSSC.py b/DSSC.py
index 52eb8a1..1796ea0 100644
--- a/DSSC.py
+++ b/DSSC.py
@@ -101,7 +101,7 @@ class DSSC:
         print(f'Temporary directory: {self.tempdir}')
 
         print('Creating virtual dataset')
-        self.vdslist = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
+        self.vds_filenames = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
         
         # create a dummy scan variable for dark run
         # for other type or run, use DSSC.define_run function to overwrite it
@@ -275,17 +275,25 @@ class DSSC:
             input:
                 run: extra-data run
                 path: string where the virtual files are created
+            output:
+                dictionnary of key:module, value:virtual dataset filename
         """
         
-        vds_list = []
-        for m in tqdm(range(16)):
-            vds_filename = os.path.join(path, f'dssc{m}_vds.h5')
-            if os.path.isfile(vds_filename):
-                os.remove(vds_filename)
-            module_vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{m}CH0:xtdf',
-                                                 'image.data', filename=vds_filename)
-            vds_list.append([vds_filename, module_vds])
-        return vds_list
+        vds_filenames = {}
+
+        for module in tqdm(range(16)):
+            fname = os.path.join(path, f'dssc{module}_vds.h5')
+            if os.path.isfile(fname):
+                os.remove(fname)
+
+            vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf',
+                                                 'image.data', filename=fname)
+
+            vds.file.close() # keep h5 file closed outside 'with' context
+
+            vds_filenames[module] = fname
+
+        return vds_filenames
     
     def binning(self, do_pulse_mean=True):
         """ Bin the DSSC data by the predifined scan type (DSSC.define()) using multiprocessing
@@ -306,7 +314,7 @@ class DSSC:
             jobs.append(dict(
             module=m,
             fpt=self.fpt,
-            vdf_module=os.path.join(self.tempdir, f'dssc{m}_vds.h5'),
+            vds=self.vds_filenames[m],
             chunksize=self.chunksize,
             scan=self.scan['scan_variable'],
             nbunches=self.nbunches,
@@ -596,7 +604,7 @@ class DSSC:
 def process_one_module(job):
     module = job['module']
     fpt = job['fpt']
-    data_vdf = job['vdf_module']
+    vds = job['vds']
     scan = job['scan']
     chunksize = job['chunksize']
     nbunches = job['nbunches']
@@ -604,8 +612,11 @@ def process_one_module(job):
 
     image_path = f'INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data'
     npulse_path = f'INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count'
-    with h5py.File(data_vdf, 'r') as m:
+    with h5py.File(vds, 'r') as m:
         all_trainIds = m['INDEX/trainId'][()]
+        frames_per_train = m[npulse_path][()]
+    trains_with_data = all_trainIds[frames_per_train == fpt]
+
     n_trains = len(all_trainIds)
     chunk_start = np.arange(n_trains, step=chunksize, dtype=int)
 
@@ -632,14 +643,7 @@ def process_one_module(job):
         module_data['module'] = module
 
     # crunching
-    with h5py.File(data_vdf, 'r') as m:
-        #fpt_calc = int(len(m[image_path]) / n_trains)
-        #assert fpt_calc == fpt, f'data length does not match expected value (module {module})'
-        all_trainIds = m['INDEX/trainId'][()]
-        frames_per_train = m[npulse_path][()]
-        trains_with_data = all_trainIds[frames_per_train == fpt]
-        #print(np.unique(pulses_per_train), '/', fpt)
-        #print(len(trains_with_data))
+    with h5py.File(vds, 'r') as m:
         chunk_start = np.arange(len(all_trainIds), step=chunksize, dtype=int)
         trains_start = 0
                    
-- 
GitLab