diff --git a/src/cal_tools/files.py b/src/cal_tools/files.py
index 21fb85520e1d337ad7db7b9b7a26739b93d95e62..9d18d72de4b12fc8d20a88eff7d922854c24bc88 100644
--- a/src/cal_tools/files.py
+++ b/src/cal_tools/files.py
@@ -157,7 +157,7 @@ class DataFile(h5py.File):
         return self
 
     def create_index(self, train_ids, timestamps=None, flags=None,
-                     origins=None):
+                     origins=None, from_file=None):
         """Create global INDEX datasets.
 
         These datasets are agnostic of any source and describe the
@@ -171,11 +171,30 @@ class DataFile(h5py.File):
                 initial origin of each train, 1 if omitted.
             origins (array_like, optional): Which source is the initial
                 origin of each train, -1 (time server) if omitted.
+            from_file (str, Path or extra_data.FileAccess, optional):
+                Existing data file to take timestamps, flags and origins
+                information from if present.
 
         Returns:
             None
         """
 
+        if from_file is not None:
+            from extra_data import FileAccess
+
+            if not isinstance(from_file, FileAccess):
+                from_file = FileAccess(from_file)
+
+            sel_trains = np.isin(from_file.train_ids, train_ids)
+
+            if 'INDEX/timestamp' in from_file.file:
+                timestamps = from_file.file['INDEX/timestamp'][sel_trains]
+
+            flags = from_file.validity_flag[sel_trains]
+
+            if 'INDEX/origin' in from_file.file:
+                origins = from_file.file['INDEX/origin'][sel_trains]
+
         self.create_dataset('INDEX/trainId', data=train_ids, dtype=np.uint64)
 
         if timestamps is None: