diff --git a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb index 022f851dbf4260fb0067924ec05bfe83e4db1ef5..55515eb7185ecd3666b5ef6b440b53e9685064be 100644 --- a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb +++ b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb @@ -46,9 +46,17 @@ "ignore_ppl = False # Ignore any PPL entries in the PPT.\n", "ppl_offset = 0 # In units of the PPT.\n", "laser_ppt_mask = -1 # Bit mask for used laser, negative to auto-detect from instrument. \n", - "instrument_sase = 3\n", - "first_pulse_offset = 1000\n", - "single_pulse_length = 25000\n", + "instrument_sase = 3 # Which SASE we're running at for PPT decoding.\n", + "first_pulse_offset = 10000 # Sample position where the first pulse begins, ignored when PPT is reconstructed.\n", + "single_pulse_length = 25000 # How many samples if there's only one pulse.\n", + "pulse_start_offset = 0 # Signal offset at the start of each pulse.\n", + "pulse_end_offset = 0 # Signal offset at the end of each pulse.\n", + "\n", + "# PPT reconstruction parameters.\n", + "reconstruct_ppt = False # Reconstruct PPT from some trigger edges.\n", + "trigger_edge_channel = '4_D' # Channel to use for triggering.\n", + "trigger_edge_offset = 0 # Offset to apply to the first trigger edge position to compute first pulse offset.\n", + "fake_ppt_offset = 0 # Offset in reconstructed PPT for pulses.\n", "\n", "# Parallelization parameters.\n", "mp_find_triggers = 0.5 # Parallelization for finding triggers.\n", @@ -108,7 +116,8 @@ "remi = Analysis(calib_config_path, use_hex=not quad_anode)\n", "\n", "with timing('open_run'):\n", - " dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True))" + " dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True),\n", + " require_ppt=not reconstruct_ppt)" ] }, { @@ -142,6 +151,98 @@ "# Pulse and trigger information" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read PPT from file or reconstruct PPT for older data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if reconstruct_ppt:\n", + " # Take up to the first hundred trains for now.\n", + " # Could be done for each train individually, but likely not necessary for now.\n", + " trigger_trace = dc[remi['digitizer']['source'], remi['digitizer']['key_pattern'].format(trigger_edge_channel)] \\\n", + " [:100].ndarray().mean(axis=0).astype(np.float64)\n", + " trigger_trace -= trigger_trace[0] # Use simple offset correction.\n", + "\n", + " fake_ppt = np.zeros(2700, dtype=np.uint32)\n", + " \n", + " discr_func, discr_params = remi.get_discriminator([trigger_edge_channel])\n", + "\n", + " edges = np.zeros(1000, dtype=np.float64)\n", + " num_pulses = discr_func(trigger_trace, edges, **discr_params[0])\n", + " edges = edges[:num_pulses]\n", + "\n", + " first_edge = edges[0]\n", + " rel_edges = np.round(edges - first_edge)\n", + " edge_diff = rel_edges[1] - rel_edges[0]\n", + "\n", + " if not np.allclose(rel_edges[1:] - rel_edges[:-1], edge_diff):\n", + " raise ValueError('PPT reconstruction for unstable edge intervals not supported')\n", + "\n", + " pulse_spacing = edge_diff / (2 * remi['digitizer']['clock_factor']) # In units of PPT\n", + "\n", + " if not float.is_integer(pulse_spacing):\n", + " raise ValueError('PPT reconstruction encountered non-integer pulse spacing')\n", + "\n", + " pulse_spacing = int(pulse_spacing)\n", + "\n", + " # Taken from euxfel_bunch_pattern/__init__.py\n", + " from euxfel_bunch_pattern import DESTINATION_T4D, DESTINATION_T5D, PHOTON_LINE_DEFLECTION\n", + " if instrument_sase == 1:\n", + " flag = DESTINATION_T4D\n", + " elif instrument_sase == 2:\n", + " flag = DESTINATION_T5D\n", + " elif instrument_sase == 3:\n", + " flag = DESTINATION_T4D | PHOTON_LINE_DEFLECTION\n", + "\n", + " first_pulse_offset = int(first_edge + trigger_edge_offset) # Overwrite notebook argument.\n", + " fake_ppt[fake_ppt_offset:fake_ppt_offset + (pulse_spacing * num_pulses):pulse_spacing] = flag\n", + "\n", + " from pasha.functor import Functor, gen_split_slices\n", + " class FakeKeyDataFunctor(Functor):\n", + " \"\"\"Functor appearing KeyData-like with constant data.\n", + " \n", + " This functor serves a constant data row for a given number\n", + " of train IDs the same way a KeyData object would.\n", + " \"\"\"\n", + " \n", + " def __init__(self, row, train_ids):\n", + " self.row = row\n", + " self.train_ids = train_ids\n", + " \n", + " def split(self, num_workers):\n", + " return gen_split_slices(len(self.train_ids), n_parts=num_workers)\n", + "\n", + " def iterate(self, share):\n", + " it = zip(range(*share.indices(len(self.train_ids))), self.train_ids)\n", + "\n", + " for index, train_id in it:\n", + " yield index, train_id, self.row\n", + " \n", + " ppt_data = FakeKeyDataFunctor(fake_ppt, dc.train_ids)\n", + " \n", + " fig, ax = plt.subplots(num=99, figsize=(9, 6), clear=True, ncols=1, nrows=1)\n", + "\n", + " ax.set_title('Edge trigger signal')\n", + " ax.plot(trigger_trace, lw=1, label=f'Mean {trigger_edge_channel} trace')\n", + " ax.vlines(edges, trigger_trace.min()*1.1, trigger_trace.max()*1.1,\n", + " color='red', linewidth=3, alpha=0.3, label='Edge positions')\n", + " \n", + " ax.set_xlabel('Samples')\n", + " ax.set_ylabel('Intensity / ADU')\n", + " ax.legend()\n", + " \n", + "else:\n", + " ppt_data = dc[ppt_source, 'data.bunchPatternTable']" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -161,8 +262,6 @@ "# * `pulse_offsets [int32: len(dc.train_ids)]` containing the global offset for the first pulse of each train.\n", "# * `num_pulses = pulse_counts.sum(axis=0)`\n", "\n", - "ppt_data = dc[ppt_source, 'data.bunchPatternTable']\n", - "\n", "def get_pulse_positions(ppt, sase, laser, ppl_offset):\n", " # Combine FEL and PPL positions.\n", "\n", @@ -290,8 +389,8 @@ " pulse_count = pulse_counts[index]\n", " \n", " train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]\n", - " train_triggers['start'] = start_int\n", - " train_triggers['stop'] = start_int + int(pulse_len * 2 * clock_factor) - 1\n", + " train_triggers['start'] = start_int + pulse_start_offset\n", + " train_triggers['stop'] = start_int + int(pulse_len * 2 * clock_factor) - 1 + pulse_end_offset\n", " train_triggers['offset'] = start_frac - start_int\n", " train_triggers['pulse'] = all_pos.astype(np.int16)\n", " train_triggers['fel'] = [pos in fel_pos for pos in all_pos]\n", @@ -911,9 +1010,36 @@ "metadata": {}, "outputs": [], "source": [ + "# Try to figure out proposal number from in_folder to work with older files.\n", + "m = re.match(r'p(\\d{6})', Path(in_folder).parts[-2])\n", + "if not proposal and m is not None:\n", + " proposal = int(m[1])\n", + "\n", "seq_len = out_seq_len if out_seq_len > 0 else len(dc.files[0].train_ids)\n", "dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}\n", "\n", + "control_sources = [det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())\n", + " for det_name in remi['detector']]\n", + "\n", + "channels = []\n", + "if save_raw_triggers or save_raw_edges:\n", + " channels.append('raw')\n", + "if save_rec_signals or save_rec_hits:\n", + " channels.append('rec')\n", + " \n", + "instrument_channels = [\n", + " f'{device_id}:{det_output_key}/{channel}'\n", + " for device_id in control_sources\n", + " for channel in channels\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "Path(out_folder).mkdir(parents=True, exist_ok=True)\n", "print('Writing sequence files', flush=True, end='')\n", "\n", @@ -924,6 +1050,8 @@ " seq_train_ids = dc.train_ids[train_mask]\n", "\n", " with DataFile.from_details(out_folder, out_aggregator, run, seq_id) as outp:\n", + " outp.create_metadata(like=dc, proposal=proposal, run=run, sequence=seq_id,\n", + " control_sources=control_sources, instrument_channels=instrument_channels)\n", " outp.create_index(seq_train_ids)\n", " \n", " for det_name in remi['detector']:\n", @@ -953,8 +1081,6 @@ " chunks=tuple(chunks_hits), **dataset_kwargs)\n", " \n", " cur_fast_data.create_index(raw=pulse_counts[train_mask], rec=pulse_counts[train_mask])\n", - " \n", - " outp.create_metadata(like=dc)\n", " \n", " print('.', flush=True, end='')\n", " \n",