Keep virtual dataset h5 files closed outside 'with' context

d59aabb0 · Loïc Le Guyader · ce89f22a · d59aabb0
Commit d59aabb0 authored 5 years ago by Loïc Le Guyader
--- a/DSSC.py
+++ b/DSSC.py
@@ -101,7 +101,7 @@ class DSSC:
        print(f'Temporary directory: {self.tempdir}')
        print('Creating virtual dataset')
-        self.vdslist = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
+        self.vds_filenames = self.create_virtual_dssc_datasets(self.run, path=self.tempdir)
        # create a dummy scan variable for dark run
        # for other type or run, use DSSC.define_run function to overwrite it
@@ -275,17 +275,25 @@ class DSSC:
            input:
                run: extra-data run
                path: string where the virtual files are created
+            output:
+                dictionnary of key:module, value:virtual dataset filename
        """
-        vds_list = []
+        vds_filenames = {}
-        for m in tqdm(range(16)):
-            vds_filename = os.path.join(path, f'dssc{m}_vds.h5')
+        for module in tqdm(range(16)):
-            if os.path.isfile(vds_filename):
+            fname = os.path.join(path, f'dssc{module}_vds.h5')
-                os.remove(vds_filename)
+            if os.path.isfile(fname):
-            module_vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{m}CH0:xtdf',
+                os.remove(fname)
-                                                 'image.data', filename=vds_filename)
-            vds_list.append([vds_filename, module_vds])
+            vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf',
-        return vds_list
+                                                 'image.data', filename=fname)
+            vds.file.close() # keep h5 file closed outside 'with' context
+            vds_filenames[module] = fname
+        return vds_filenames
    def binning(self, do_pulse_mean=True):
        """ Bin the DSSC data by the predifined scan type (DSSC.define()) using multiprocessing
@@ -306,7 +314,7 @@ class DSSC:
            jobs.append(dict(
            module=m,
            fpt=self.fpt,
-            vdf_module=os.path.join(self.tempdir, f'dssc{m}_vds.h5'),
+            vds=self.vds_filenames[m],
            chunksize=self.chunksize,
            scan=self.scan['scan_variable'],
            nbunches=self.nbunches,
@@ -596,7 +604,7 @@ class DSSC:
 def process_one_module(job):
    module = job['module']
    fpt = job['fpt']
-    data_vdf = job['vdf_module']
+    vds = job['vds']
    scan = job['scan']
    chunksize = job['chunksize']
    nbunches = job['nbunches']
@@ -604,8 +612,11 @@ def process_one_module(job):
    image_path = f'INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data'
    npulse_path = f'INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count'
-    with h5py.File(data_vdf, 'r') as m:
+    with h5py.File(vds, 'r') as m:
        all_trainIds = m['INDEX/trainId'][()]
+        frames_per_train = m[npulse_path][()]
+    trains_with_data = all_trainIds[frames_per_train == fpt]
    n_trains = len(all_trainIds)
    chunk_start = np.arange(n_trains, step=chunksize, dtype=int)
@@ -632,14 +643,7 @@ def process_one_module(job):
        module_data['module'] = module
    # crunching
-    with h5py.File(data_vdf, 'r') as m:
+    with h5py.File(vds, 'r') as m:
-        #fpt_calc = int(len(m[image_path]) / n_trains)
-        #assert fpt_calc == fpt, f'data length does not match expected value (module {module})'
-        all_trainIds = m['INDEX/trainId'][()]
-        frames_per_train = m[npulse_path][()]
-        trains_with_data = all_trainIds[frames_per_train == fpt]
-        #print(np.unique(pulses_per_train), '/', fpt)
-        #print(len(trains_with_data))
        chunk_start = np.arange(len(all_trainIds), step=chunksize, dtype=int)
        trains_start = 0