From 2049c42562de8a0472626a2d00b6d4c187945649 Mon Sep 17 00:00:00 2001 From: Philipp Schmidt <philipp.schmidt@xfel.eu> Date: Mon, 16 May 2022 14:45:07 +0200 Subject: [PATCH] Reorder writes in LPD correct for faster access --- notebooks/LPD/LPD_Correct_Fast.ipynb | 7 +++---- src/cal_tools/files.py | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/notebooks/LPD/LPD_Correct_Fast.ipynb b/notebooks/LPD/LPD_Correct_Fast.ipynb index c9e80f472..bbd744301 100644 --- a/notebooks/LPD/LPD_Correct_Fast.ipynb +++ b/notebooks/LPD/LPD_Correct_Fast.ipynb @@ -423,22 +423,21 @@ " train_ids=dc.train_ids,\n", " timestamps=fa.file['INDEX/timestamp'][sel_trains],\n", " flags=fa.validity_flag[sel_trains])\n", + " outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))\n", " \n", " outp_source = outp_file.create_instrument_source(outp_source_name)\n", " \n", " outp_source.create_index(image=image_counts)\n", - " outp_source.create_key('image.data', data=out_data,\n", - " chunks=(chunks_data, 256, 256))\n", " outp_source.create_key('image.cellId', data=in_cell,\n", " chunks=(chunks_ids,))\n", " outp_source.create_key('image.pulseId', data=in_pulse,\n", " chunks=(chunks_ids,))\n", + " outp_source.create_key('image.data', data=out_data,\n", + " chunks=(chunks_data, 256, 256))\n", " write_compressed_frames(\n", " out_gain, outp_file, f'INSTRUMENT/{outp_source_name}/image/gain', comp_threads=8)\n", " write_compressed_frames(\n", " out_mask, outp_file, f'INSTRUMENT/{outp_source_name}/image/mask', comp_threads=8)\n", - " \n", - " outp_file.create_metadata(like=dc)\n", " write_time = perf_counter() - start\n", " \n", " total_time = open_time + read_time + correct_time + write_time\n", diff --git a/src/cal_tools/files.py b/src/cal_tools/files.py index c24a31b45..57beac95a 100644 --- a/src/cal_tools/files.py +++ b/src/cal_tools/files.py @@ -242,7 +242,7 @@ class DataFile(h5py.File): creation_date=None, update_date=None, proposal=0, run=None, sequence=None, daq_library='1.x', karabo_framework='2.x', control_sources=(), - instrument_sources=()): + instrument_channels=()): """Create METADATA datasets. Args: @@ -267,9 +267,10 @@ class DataFile(h5py.File): control_sources (Iterable, optional): Control sources in this file, sources created via create_control_source are automatically included. - instrument_sources (Iterable, optional): Instrument sources - in this file, sources created via - create_instrument_source are automatically included. + instrument_channels (Iterable, optional): Instrument + channels (source and first component of data hash) in + this file, channels created via create_instrument_source + are automatically included. Returns: None @@ -307,11 +308,13 @@ class DataFile(h5py.File): sources = {name: 'CONTROL' for name in chain(self.__control_sources, control_sources)} - # Expand known and specified instrument sources with their - # channels. + # Add in the specified instrument data channels. + sources.update({full_channel: 'INSTRUMENT' + for full_channel in instrument_channels}) + + # Add in those already in the file, if not already passed. sources.update({f'{name}/{channel}': 'INSTRUMENT' - for name in chain(self.__instrument_sources, - instrument_sources) + for name in self.__instrument_sources for channel in self[f'INSTRUMENT/{name}']}) source_names = sorted(sources.keys()) -- GitLab