diff --git a/src/exdf/write/datafile.py b/src/exdf/write/datafile.py index 82c2cabc7cc8c226d30583a72592df61072765f5..b72537a6ea23dd7096b8fcbcedc27f3b38a9efde 100644 --- a/src/exdf/write/datafile.py +++ b/src/exdf/write/datafile.py @@ -324,6 +324,34 @@ class DataFile(h5py.File): return InstrumentSource(self.create_group(f'INSTRUMENT/{source}').id, source) + def create_legacy_source(self, legacy_source, target_source): + """Create a legacy source. + + A legacy source allows to access an instrument source under a + different name, primarily for the purpose of backwards + compatibility. It inserts soft links under the legacy name in + the INSTRUMENT and INDEX groups, pointing to the respective + groups of this source. + + Args: + legacy_source (str): Legacy source. + target_source (str or InstrumentSource): Target source. + + Returns: + None + """ + + if isinstance(target_source, InstrumentSource): + target_source = target_source.name[1:].partition('/')[2] + elif not isinstance(target_source, str): + raise ValueError('target_source must be str or InstrumentSource') + + self.file[f'/INSTRUMENT/{legacy_source}'] = h5py.SoftLink( + f'/INSTRUMENT/{target_source}') + self.file[f'/INDEX/{legacy_source}'] = h5py.SoftLink( + f'/INDEX/{target_source}') + self.__instrument_sources.add(legacy_source) + def create_metadata(self, like=None, *, creation_date=None, update_date=None, proposal=0, run=0, sequence=None, data_format_version='1.2', diff --git a/src/exdf/write/sd_writer.py b/src/exdf/write/sd_writer.py index a02691bf0424f98c11fca0ac5e12770f168798f3..c371da952f56f5771a12ca638daaafaac1b37e2d 100644 --- a/src/exdf/write/sd_writer.py +++ b/src/exdf/write/sd_writer.py @@ -119,11 +119,13 @@ class SourceDataWriter: after_base = perf_counter() self.write_control( - f, [sd for sd in sources if sd.is_control]) + f, [sd for sd in sources + if sd.is_control and not sd.is_legacy]) after_control = perf_counter() self.write_instrument( - f, [sd for sd in sources if sd.is_instrument]) + f, [sd for sd in sources + if sd.is_instrument and not sd.is_legacy]) after_instrument = perf_counter() after_close = perf_counter() @@ -150,6 +152,7 @@ class SourceDataWriter: train_ids, *index_dsets = get_index_root_data(sources) control_indices, instrument_indices = build_sources_index(sources) + legacy_sources, legacy_source_channels = get_legacy_sources(sources) f.create_metadata( like=sources[0], @@ -159,7 +162,9 @@ class SourceDataWriter: instrument_channels=[ f'{source}/{index_group}' for source, index_group_counts in instrument_indices.items() - for index_group in index_group_counts.keys()]) + for index_group in index_group_counts.keys() + ] + legacy_source_channels + ) f.create_dataset('METADATA/dataWriter', data=b'exdf-tools', shape=(1,)) if not self.with_origin(): @@ -176,6 +181,9 @@ class SourceDataWriter: instrument_src = f.create_instrument_source(source) instrument_src.create_index(**index_group_counts) + for sd in legacy_sources: + f.create_legacy_source(sd.source, sd.canonical_name) + def write_control(self, f, sources): """Write CONTROL and RUN data. @@ -351,11 +359,15 @@ def get_index_root_data(sources): # Collect train IDs for this sequence. train_ids = np.zeros(0, dtype=np.uint64) for sd in sources: + if sd.is_legacy: + continue train_ids = np.union1d(train_ids, sd.train_ids) # Collect input files by index keys (source / index_group). files_by_index_keys = {} for sd in sources: + if sd.is_legacy: + continue for key in sd.keys(): kd = sd[key] index_key = f'{sd.source}/{kd.index_group}' @@ -458,7 +470,7 @@ def build_sources_index(sources): for sd in sources: if sd.is_control: control_indices[sd.source] = sd.data_counts(labelled=False) - else: + elif not sd.is_legacy: instrument_indices[sd.source] = { grp: sd.data_counts(labelled=False, index_group=grp) for grp in sd.index_groups} @@ -466,6 +478,24 @@ def build_sources_index(sources): return control_indices, instrument_indices +def get_legacy_sources(sources): + legacy_sources = [] + channels = [] + for sd in sources: + if not sd.is_legacy: + continue + if sd.is_control: + raise ValueError( + "Legacy source name is not supported for CONTROL data") + legacy_sources.append(sd) + if sd.source.endswith(":xtdf"): + channels.append(sd.source + "/image") + else: + channels.extend( + (f"{sd.source}/{grp}" for grp in sd.index_groups)) + return legacy_sources, channels + + def get_key_attributes(sd): if sd.is_control: section = 'RUN'