From d59aabb02dd7bd4397fb4ae786ecc83b43fe2c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Le=20Guyader?= <loic.le.guyader@xfel.eu> Date: Wed, 8 Apr 2020 17:20:16 +0200 Subject: [PATCH] Keep virtual dataset h5 files closed outside 'with' context --- DSSC.py | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/DSSC.py b/DSSC.py index 52eb8a1..1796ea0 100644 --- a/DSSC.py +++ b/DSSC.py @@ -101,7 +101,7 @@ class DSSC: print(f'Temporary directory: {self.tempdir}') print('Creating virtual dataset') - self.vdslist = self.create_virtual_dssc_datasets(self.run, path=self.tempdir) + self.vds_filenames = self.create_virtual_dssc_datasets(self.run, path=self.tempdir) # create a dummy scan variable for dark run # for other type or run, use DSSC.define_run function to overwrite it @@ -275,17 +275,25 @@ class DSSC: input: run: extra-data run path: string where the virtual files are created + output: + dictionnary of key:module, value:virtual dataset filename """ - vds_list = [] - for m in tqdm(range(16)): - vds_filename = os.path.join(path, f'dssc{m}_vds.h5') - if os.path.isfile(vds_filename): - os.remove(vds_filename) - module_vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{m}CH0:xtdf', - 'image.data', filename=vds_filename) - vds_list.append([vds_filename, module_vds]) - return vds_list + vds_filenames = {} + + for module in tqdm(range(16)): + fname = os.path.join(path, f'dssc{module}_vds.h5') + if os.path.isfile(fname): + os.remove(fname) + + vds = run.get_virtual_dataset(f'SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf', + 'image.data', filename=fname) + + vds.file.close() # keep h5 file closed outside 'with' context + + vds_filenames[module] = fname + + return vds_filenames def binning(self, do_pulse_mean=True): """ Bin the DSSC data by the predifined scan type (DSSC.define()) using multiprocessing @@ -306,7 +314,7 @@ class DSSC: jobs.append(dict( module=m, fpt=self.fpt, - vdf_module=os.path.join(self.tempdir, f'dssc{m}_vds.h5'), + vds=self.vds_filenames[m], chunksize=self.chunksize, scan=self.scan['scan_variable'], nbunches=self.nbunches, @@ -596,7 +604,7 @@ class DSSC: def process_one_module(job): module = job['module'] fpt = job['fpt'] - data_vdf = job['vdf_module'] + vds = job['vds'] scan = job['scan'] chunksize = job['chunksize'] nbunches = job['nbunches'] @@ -604,8 +612,11 @@ def process_one_module(job): image_path = f'INSTRUMENT/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/data' npulse_path = f'INDEX/SCS_DET_DSSC1M-1/DET/{module}CH0:xtdf/image/count' - with h5py.File(data_vdf, 'r') as m: + with h5py.File(vds, 'r') as m: all_trainIds = m['INDEX/trainId'][()] + frames_per_train = m[npulse_path][()] + trains_with_data = all_trainIds[frames_per_train == fpt] + n_trains = len(all_trainIds) chunk_start = np.arange(n_trains, step=chunksize, dtype=int) @@ -632,14 +643,7 @@ def process_one_module(job): module_data['module'] = module # crunching - with h5py.File(data_vdf, 'r') as m: - #fpt_calc = int(len(m[image_path]) / n_trains) - #assert fpt_calc == fpt, f'data length does not match expected value (module {module})' - all_trainIds = m['INDEX/trainId'][()] - frames_per_train = m[npulse_path][()] - trains_with_data = all_trainIds[frames_per_train == fpt] - #print(np.unique(pulses_per_train), '/', fpt) - #print(len(trains_with_data)) + with h5py.File(vds, 'r') as m: chunk_start = np.arange(len(all_trainIds), step=chunksize, dtype=int) trains_start = 0 -- GitLab