diff --git a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb index ddb1c0c17976f3aaa87928d0aa2d34728949bbd1..fc1ac99babe5143276436c865e383dc0b67237b4 100644 --- a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb +++ b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb @@ -412,32 +412,59 @@ " train_triggers['fel'] = [pos in fel_pos for pos in all_pos]\n", " train_triggers['ppl'] = [pos in ppl_pos for pos in all_pos]\n", "\n", - "with timing('find_triggers'):\n", - " psh.map(trigger_by_ppt, ppt_data)\n", " \n", - "if (np.unique(triggers['pulse'][1:] - triggers['pulse'][:-1]) > 0).sum() > 1:\n", - " # There is more than one delta between pulse entries across all pulses. This is not\n", - " # necessarily a problem, as the pattern could simply have changed in between trains\n", - " # with each train being split properly.\n", - " # If there's more than one delta in a single train, this likely points to a mismatch\n", - " # of FEL and PPL repetition rate. This is most likely not intended.\n", + "if ignore_fel and ignore_ppl:\n", + " # Both FEL and PPL are ignored, use virtual full train triggers.\n", + " print('WARNING: Both FEL and PPL pulses are ignored, '\n", + " 'virtual trigger is inserted covering the entire train')\n", " \n", - " one = np.uint64(1) # Because np.uint64 + int = np.float64\n", - " pulse_deltas = set()\n", - "\n", - " for pulse_id, (offset, count) in enumerate(zip(pulse_offsets, pulse_counts)):\n", - " deltas = triggers['pulse'][offset+one:offset+count] - triggers['pulse'][offset:offset+count-one]\n", - "\n", - " if len(np.unique(deltas)) > 1:\n", - " for delta in deltas:\n", - " pulse_deltas.add(delta)\n", - "\n", - " if len(pulse_deltas) > 1:\n", - " delta_str = ', '.join([str(x) for x in sorted(pulse_deltas)])\n", - " warning(f'Different pulse lengths (PPT: {delta_str}) encountered within single trains, '\n", - " f'separated pulse spectra may split up signals!')\n", - " else:\n", - " warning('Different pulse lengths encountered across trains, separation may be unstable!')" + " # Overwrite global pulse statistics computed before,\n", + " num_pulses = len(dc.train_ids)\n", + " pulse_counts[:] = 1\n", + " pulse_counts = pulse_counts.astype(np.int32)\n", + " pulse_offsets = np.arange(len(pulse_counts)).astype(np.int32)\n", + "\n", + " # Obtain minimal trace length.\n", + " min_trace_len = min([\n", + " dc[src, key].entry_shape[0]\n", + " for det_name in remi['detector'].keys()\n", + " for src, key in remi.get_detector_sourcekeys(det_name)\n", + " ])\n", + "\n", + " triggers['start'] = first_pulse_offset\n", + " triggers['stop'] = min_trace_len\n", + " triggers['offset'] = 0.0\n", + " triggers['pulse'] = -1\n", + " triggers['fel'] = False\n", + " triggers['ppl'] = False \n", + " \n", + "else:\n", + " with timing('find_triggers'):\n", + " psh.map(trigger_by_ppt, ppt_data)\n", + " \n", + " if (np.unique(triggers['pulse'][1:] - triggers['pulse'][:-1]) > 0).sum() > 1:\n", + " # There is more than one delta between pulse entries across all pulses. This is not\n", + " # necessarily a problem, as the pattern could simply have changed in between trains\n", + " # with each train being split properly.\n", + " # If there's more than one delta in a single train, this likely points to a mismatch\n", + " # of FEL and PPL repetition rate. This is most likely not intended.\n", + "\n", + " one = np.uint64(1) # Because np.uint64 + int = np.float64\n", + " pulse_deltas = set()\n", + "\n", + " for pulse_id, (offset, count) in enumerate(zip(pulse_offsets, pulse_counts)):\n", + " deltas = triggers['pulse'][offset+one:offset+count] - triggers['pulse'][offset:offset+count-one]\n", + "\n", + " if len(np.unique(deltas)) > 1:\n", + " for delta in deltas:\n", + " pulse_deltas.add(delta)\n", + "\n", + " if len(pulse_deltas) > 1:\n", + " delta_str = ', '.join([str(x) for x in sorted(pulse_deltas)])\n", + " warning(f'Different pulse lengths (PPT: {delta_str}) encountered within single trains, '\n", + " f'separated pulse spectra may split up signals!')\n", + " else:\n", + " warning('Different pulse lengths encountered across trains, separation may be unstable!')" ] }, { @@ -878,7 +905,7 @@ "max_num_hits = 0.0\n", " \n", "for det_name in remi['detector'].keys():\n", - " agg_window = num_pulses // 1000\n", + " agg_window = num_pulses // min(1000, num_pulses)\n", " \n", " num_hits = np.isfinite(det_data[det_name]['hits']['x']).sum(axis=1)\n", " num_hits = num_hits[:(len(num_hits) // agg_window) * agg_window]\n", @@ -1127,6 +1154,7 @@ " \n", " for det_name in remi['detector']:\n", " cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())\n", + " cur_max_hits = remi['detector'][det_name]['max_hits']\n", " \n", " cur_control_data = outp.create_control_source(cur_device_id)\n", " # Manually manipulate the file here, still creates the index properly.\n", @@ -1139,23 +1167,36 @@ " \n", " if save_raw_triggers:\n", " cur_fast_data.create_key('raw.triggers', triggers[pulse_mask],\n", + " maxshape=(None,) + triggers.shape[1:],\n", " chunks=tuple(chunks_triggers), **dataset_kwargs)\n", " \n", " if save_raw_edges:\n", " cur_fast_data.create_key('raw.edges', cur_data['edges'][pulse_mask],\n", - " chunks=tuple(chunks_edges), **dataset_kwargs)\n", + " maxshape=(None,) + cur_data['edges'].shape[1:],\n", + " chunks=tuple(chunks_edges if chunks_edges[-1] <= cur_max_hits\n", + " else chunks_edges[:-1] + [cur_max_hits]),\n", + " **dataset_kwargs)\n", " \n", " if save_raw_amplitudes:\n", " cur_fast_data.create_key('raw.amplitudes', cur_data['amplitudes'][pulse_mask],\n", - " chunks=tuple(chunks_amplitudes), **dataset_kwargs)\n", + " maxshape=(None,) + cur_data['amplitudes'].shape[1:],\n", + " chunks=tuple(chunks_amplitudes if chunks_amplitudes[-1] <= cur_max_hits\n", + " else chunks_amplitudes[:-1] + [cur_max_hits]),\n", + " **dataset_kwargs)\n", " \n", " if save_rec_signals:\n", " cur_fast_data.create_key('rec.signals', cur_data['signals'][pulse_mask],\n", - " chunks=tuple(chunks_signals), **dataset_kwargs)\n", + " maxshape=(None,) + cur_data['signals'].shape[1:],\n", + " chunks=tuple(chunks_signals if chunks_signals[-1] <= cur_max_hits\n", + " else chunks_signals[:-1] + [cur_max_hits]),\n", + " **dataset_kwargs)\n", " \n", " if save_rec_hits:\n", " cur_fast_data.create_key('rec.hits', cur_data['hits'][pulse_mask],\n", - " chunks=tuple(chunks_hits), **dataset_kwargs)\n", + " maxshape=(None,) + hits.shape[1:],\n", + " chunks=tuple(chunks_hits if chunks_hits[-1] <= cur_max_hits\n", + " else chunks_hits[:-1] + [cur_max_hits]),\n", + " **dataset_kwargs)\n", " \n", " cur_fast_data.create_index(raw=pulse_counts[train_mask], rec=pulse_counts[train_mask])\n", " \n",