diff --git a/src/cal_tools/files.py b/src/cal_tools/files.py index 21fb85520e1d337ad7db7b9b7a26739b93d95e62..9d18d72de4b12fc8d20a88eff7d922854c24bc88 100644 --- a/src/cal_tools/files.py +++ b/src/cal_tools/files.py @@ -157,7 +157,7 @@ class DataFile(h5py.File): return self def create_index(self, train_ids, timestamps=None, flags=None, - origins=None): + origins=None, from_file=None): """Create global INDEX datasets. These datasets are agnostic of any source and describe the @@ -171,11 +171,30 @@ class DataFile(h5py.File): initial origin of each train, 1 if omitted. origins (array_like, optional): Which source is the initial origin of each train, -1 (time server) if omitted. + from_file (str, Path or extra_data.FileAccess, optional): + Existing data file to take timestamps, flags and origins + information from if present. Returns: None """ + if from_file is not None: + from extra_data import FileAccess + + if not isinstance(from_file, FileAccess): + from_file = FileAccess(from_file) + + sel_trains = np.isin(from_file.train_ids, train_ids) + + if 'INDEX/timestamp' in from_file.file: + timestamps = from_file.file['INDEX/timestamp'][sel_trains] + + flags = from_file.validity_flag[sel_trains] + + if 'INDEX/origin' in from_file.file: + origins = from_file.file['INDEX/origin'][sel_trains] + self.create_dataset('INDEX/trainId', data=train_ids, dtype=np.uint64) if timestamps is None: