diff --git a/bin/slurm_finalize.sh b/bin/slurm_finalize.sh index 9eaf9a73581d5861f3fba2005324de0542e1a4f0..461d7cdd1d0112da2329efbff12fbbffdfce1bad 100755 --- a/bin/slurm_finalize.sh +++ b/bin/slurm_finalize.sh @@ -6,6 +6,7 @@ set -euo pipefail python_path=$1 temp_dir=$2 finalize_script=$3 +report_to=$4 echo "Running with the following parameters:" echo "Python path: $python_path" @@ -23,9 +24,11 @@ export MPLBACKEND=AGG # Ensure Python uses UTF-8 for files by default export LANG=en_US.UTF-8 -shopt -s failglob # Fail fast if there are no notebooks found -echo "Converting notebooks" -${python_path} -m nbconvert --to rst --TemplateExporter.exclude_input=True "$temp_dir"/*.ipynb -shopt -u failglob # Restore default glob behaviour +if [ -n "$report_to" ]; then + shopt -s failglob # Fail fast if there are no notebooks found + echo "Converting notebooks" + ${python_path} -m nbconvert --to rst --TemplateExporter.exclude_input=True "$temp_dir"/*.ipynb + shopt -u failglob # Restore default glob behaviour +fi ${python_path} "$finalize_script" diff --git a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb index 613b264ad2f2fe0541ac49b543c5f2f33b5e80a3..30b1b8320226df835b870adf6eadbcaf7ca1b6d5 100644 --- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb +++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb @@ -17,21 +17,22 @@ "metadata": {}, "outputs": [], "source": [ - "in_folder = \"/gpfs/exfel/exp/HED/202031/p900174/raw\" # the folder to read data from, required\n", - "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/hibef_agipd2\" # the folder to output to, required\n", + "in_folder = \"/gpfs/exfel/exp/SPB/202131/p900230/raw\" # the folder to read data from, required\n", + "out_folder = \"/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p900230\" # the folder to output to, required\n", "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", "modules = [-1] # modules to correct, set to -1 for all, range allowed\n", - "run = 155 # runs to process, required\n", + "train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed\n", + "run = 275 # runs to process, required\n", "\n", - "karabo_id = \"HED_DET_AGIPD500K2G\" # karabo karabo_id\n", + "karabo_id = \"SPB_DET_AGIPD1M-1\" # karabo karabo_id\n", "karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators\n", "receiver_id = \"{}CH0\" # inset for receiver devices\n", "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", "h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n", - "karabo_id_control = \"HED_EXP_AGIPD500K2G\" # karabo-id for control device\n", - "karabo_da_control = 'AGIPD500K2G00' # karabo DA for control infromation\n", + "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device\n", + "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n", "\n", "slopes_ff_from_files = \"\" # Path to locally stored SlopesFF and BadPixelsFF constants\n", "\n", @@ -40,6 +41,12 @@ "cal_db_timeout = 30000 # in milli seconds\n", "creation_date_offset = \"00:00:00\" # add an offset to creation date, e.g. to get different constants\n", "\n", + "use_ppu_device = '' # Device ID for a pulse picker device to only process picked trains, empty string to disable\n", + "ppu_train_offset = 0 # When using the pulse picker, offset between the PPU's sequence start and actually picked train\n", + "\n", + "use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable\n", + "energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled\n", + "\n", "max_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", "bias_voltage = 300 # Bias voltage\n", "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n", @@ -48,7 +55,6 @@ "overwrite = True # set to True if existing data should be overwritten\n", "max_pulses = [0, 352, 1] # range list [st, end, step] of memory cell indices to be processed within a train. 3 allowed maximum list input elements.\n", "mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries\n", - "cell_id_preview = 1 # cell Id used for preview in single-shot plots\n", "integration_time = -1 # integration time, negative values for auto-detection.\n", "\n", "# Correction parameters\n", @@ -82,6 +88,10 @@ "mask_zero_std = False # Mask pixels with zero standard deviation across train\n", "low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain\n", "\n", + "# Plotting parameters\n", + "skip_plots = False # exit after writing corrected files and metadata\n", + "cell_id_preview = 1 # cell Id used for preview in single-shot plots\n", + "\n", "# Paralellization parameters\n", "chunk_size = 1000 # Size of chunk for image-weise correction\n", "chunk_size_idim = 1 # chunking size of imaging dimension, adjust if user software is sensitive to this.\n", @@ -120,7 +130,7 @@ "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import yaml\n", - "from extra_data import RunDirectory, stack_detector_data\n", + "from extra_data import RunDirectory, stack_detector_data, by_id\n", "from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry\n", "from matplotlib import cm as colormap\n", "from matplotlib.colors import LogNorm\n", @@ -139,6 +149,8 @@ "from cal_tools import agipdalgs as calgs\n", "from cal_tools.agipdlib import (\n", " AgipdCorrections,\n", + " CellRange,\n", + " LitFrameSelection,\n", " get_acq_rate,\n", " get_gain_mode,\n", " get_integration_time,\n", @@ -224,7 +236,7 @@ "metadata": {}, "outputs": [], "source": [ - "if sequences[0] == -1:\n", + "if sequences == [-1]:\n", " sequences = None\n", "\n", "control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", @@ -275,6 +287,54 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [ + "if use_ppu_device:\n", + " # Obtain trains to process if using a pulse picker device.\n", + " dc = RunDirectory(in_folder / f'r{run:04d}')\n", + "\n", + " # Will throw an uncaught exception if the device is wrong.\n", + " seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()\n", + "\n", + " # The trains picked are the unique values of trainTrigger.sequenceStart\n", + " # minus the first (previous trigger before this run).\n", + " train_ids = np.unique(seq_start)[1:] + ppu_train_offset\n", + "\n", + " print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')\n", + " \n", + " # Since we got the DataCollection already, narrow down the files we open.\n", + " # This hardcodes the receiver_id and path_template parameters currently, but this\n", + " # will disappear with moving the entire notebook to EXtra-data.\n", + " subdc = dc.select_trains(by_id[train_ids]).select(f'{karabo_id}/DET/*CH0:xtdf')\n", + " subseq = {int(f.filename[-8:-3]) for f in subdc.files}\n", + " \n", + " if sequences is None:\n", + " # All sequences were meant to be processed by this job, so take the entire\n", + " # subset of sequences.\n", + " sequences = sorted(subseq)\n", + " else:\n", + " # If explicit sequences were specified (e.g. due to job balancing by xfel-calibrate)\n", + " # only work on the intersection between that and what the PPU device offers.\n", + " sequences = sorted(set(sequences) & subseq)\n", + "\n", + "elif train_ids != [-1]:\n", + " # Specific trains passed by parameter, convert to ndarray.\n", + " train_ids = np.array(train_ids)\n", + " \n", + " print(f'Processing up to {len(train_ids)} manually selected train(s)')\n", + "else:\n", + " # Process all trains.\n", + " train_ids = None\n", + " \n", + " print(f'Processing all valid trains')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], "source": [ "# set everything up filewise\n", "mapped_files, _, total_sequences, _, _ = cal_tools.tools.map_modules_from_folder(\n", @@ -328,6 +388,32 @@ "print(f\"Maximum memory cells to calibrate: {max_cells}\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if use_litframe_device:\n", + " # check run for the AgipdLitFrameFinder device\n", + " try: dc\n", + " except NameError: dc = RunDirectory(in_folder / f'r{run:04d}')\n", + "\n", + " if use_litframe_device + ':output' in dc.instrument_sources:\n", + " # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)\n", + " cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)\n", + " train_ids = cell_sel.train_ids\n", + " else:\n", + " # Use range selection (if the device is not recorded)\n", + " print(f\"WARNING: LitFrameFinder {use_litframe_device} device is not found.\")\n", + " cell_sel = CellRange(max_pulses, max_cells=max_cells)\n", + "else:\n", + " # Use range selection\n", + " cell_sel = CellRange(max_pulses, max_cells=max_cells)\n", + "\n", + "print(cell_sel.msg())" + ] + }, { "cell_type": "code", "execution_count": null, @@ -409,12 +495,13 @@ "source": [ "agipd_corr = AgipdCorrections(\n", " max_cells,\n", - " max_pulses,\n", + " cell_sel,\n", " h5_data_path=h5path,\n", " h5_index_path=h5path_idx,\n", " corr_bools=corr_bools,\n", " gain_mode=gain_mode,\n", " comp_threads=os.cpu_count() // n_cores_files,\n", + " train_ids=train_ids\n", ")\n", "\n", "agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold\n", @@ -485,6 +572,8 @@ " return err, mod, when, k_da\n", "\n", "\n", + "print(f'Preparing constants (FF: {agipd_corr.corr_bools.get(\"xray_corr\", False)}, PC: {any(agipd_corr.pc_bools)}, '\n", + " f'BLC: {any(agipd_corr.blc_bools)})')\n", "ts = perf_counter()\n", "with multiprocessing.Pool(processes=len(modules)) as pool:\n", " const_out = pool.map(retrieve_constants, modules)\n", @@ -549,15 +638,23 @@ "metadata": {}, "outputs": [], "source": [ + "step_timer.start()\n", + "\n", "with multiprocessing.Pool() as pool:\n", + " step_timer.done_step('Started pool')\n", + " \n", " for file_batch in batches(file_list, n_cores_files):\n", " # TODO: Move some printed output to logging or similar\n", " print(f\"Processing next {len(file_batch)} files\")\n", - " step_timer.start()\n", " img_counts = pool.starmap(agipd_corr.read_file, zip(range(len(file_batch)), file_batch,\n", " [not common_mode]*len(file_batch)))\n", " step_timer.done_step(f'Loading data from files')\n", "\n", + " if img_counts == 0:\n", + " # Skip any further processing and output if there are no images to\n", + " # correct in this file.\n", + " continue\n", + "\n", " if mask_zero_std:\n", " # Evaluate zero-data-std mask\n", " pool.starmap(agipd_corr.mask_zero_std, itertools.product(\n", @@ -660,6 +757,18 @@ " yaml.safe_dump({\"time-summary\": {f\"S{seq}\": timestamps}}, fd)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if skip_plots:\n", + " print('Skipping plots')\n", + " import sys\n", + " sys.exit(0)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1138,9 +1247,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.8.11" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb index afe6f30820740bfc343bc95fb834ba5fd855c128..53ecb323c0e52497a23b0b0ad003f4d5a833c081 100644 --- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb +++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb @@ -146,7 +146,7 @@ "source": [ "control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", - "\n", + "slow_paths = (control_fn, karabo_id_control)\n", "if gain_setting == 0.1:\n", " if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n", " print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n", @@ -237,7 +237,8 @@ " raise ValueError(f\"No raw images found for {qm} for all sequences\")\n", "\n", " if acq_rate == 0:\n", - " local_acq_rate = agipdlib.get_acq_rate(fast_paths=(f, karabo_id, idx))\n", + " local_acq_rate = agipdlib.get_acq_rate(\n", + " fast_paths=(f, karabo_id, idx), slow_paths=slow_paths)\n", " else:\n", " local_acq_rate = acq_rate\n", "\n", @@ -308,7 +309,7 @@ " const_mdata[\"file-path\"] = const_dict[const_name][:2]\n", " const_mdata[\"creation-time\"] = None\n", "\n", - " return qm, mdata_dict, karabo_da, acq_rate, local_max_cells" + " return qm, mdata_dict, karabo_da, local_acq_rate, local_max_cells" ] }, { diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb index ff66d2a70934726d0febb03f3a9c11f46300cab3..dd69492abc5ebed0e57e2647df5b5ffd8a1e2749 100644 --- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb +++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb @@ -330,15 +330,19 @@ "source": [ "# set everything up filewise\n", "os.makedirs(out_folder, exist_ok=True)\n", - "gain_mapped_files, total_files, total_file_size = map_gain_stages(\n", + "gain_mapped_files, _, total_file_size = map_gain_stages(\n", " in_folder, offset_runs, path_template, karabo_da, sequences\n", ")\n", - "print(f\"Will process a total of {total_files} files ({total_file_size:.02f} GB).\")\n", + "# TODO: Keep this commented out to use it later again, this is false information at the moment.\n", + "# print(f\"Will process a total of {total_files} files ({total_file_size:.02f} GB).\")\n", "\n", - "inp = []\n", + "# TODO: Remove all of this nonsense with Extra-data.\n", + "inp = [None] * 3 * len(modules)\n", "for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):\n", " gain_input = []\n", " for module_index in modules:\n", + " max_n_imgs = 0\n", + " inp_idx = gain_index * len(modules) + module_index\n", " qm = module_index_to_qm(module_index)\n", " if qm not in qm_file_map:\n", " print(f\"Did not find files for {qm}\")\n", @@ -349,17 +353,18 @@ " # TODO: remove after using EXtra-data to read files\n", " # and skip empty trains.\n", " with h5py.File(filename, \"r\") as fin:\n", - " if fin[h5path.format(module_index)+\"/trainId\"].shape[0] != 0:\n", - " print(f\"Process {filename} for {qm}\")\n", - " gain_input.append((filename, module_index, gain_index))\n", - " else:\n", - " print(f\"Do not process {filename} because it is empty.\")\n", - " if not gain_input:\n", + " n_imgs = fin[h5path.format(module_index)+\"/trainId\"].shape[0]\n", + " if n_imgs != 0 and n_imgs > max_n_imgs:\n", + " inp[inp_idx] = (filename, module_index, gain_index)\n", + " max_n_imgs = n_imgs\n", + " print(f\"Process {inp[inp_idx][0]} for {qm}\")\n", + " if inp[inp_idx] is None:\n", " raise ValueError(\n", " \"No images to process for run: \"\n", " f\"{[v for v in offset_runs.values()][gain_index]}\"\n", " )\n", - " inp += gain_input" + "inp = [x for x in inp if x is not None]\n", + "total_files = len(inp)" ] }, { diff --git a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb index a0096568611e77ae11a8822a2b58121d81bc9086..37df92a7cccac843bfe155e3fad432fb37ced4cf 100644 --- a/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb +++ b/notebooks/AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb @@ -178,6 +178,8 @@ "run = runs[0]\n", "bursts_per_file = []\n", "channel = 0\n", + "control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", + "slow_paths = (control_fname, karabo_id_control)\n", "\n", "for seq in range(seqs):\n", " fname = os.path.join(path_temp.format(run),\n", @@ -185,7 +187,8 @@ " print('Reading ',fname)\n", " \n", " if acq_rate == 0.:\n", - " acq_rate = get_acq_rate((fname, karabo_id, channel))\n", + " acq_rate = get_acq_rate(\n", + " fast_paths=(fname, karabo_id, channel), slow_paths=slow_paths)\n", " print(\"Acquisition rate set from file: {} MHz\".format(acq_rate))\n", "\n", " if mem_cells == 0:\n", @@ -225,8 +228,6 @@ "metadata": {}, "outputs": [], "source": [ - "control_fname = f'{in_folder}/r{runs[0]:04d}/RAW-R{runs[0]:04d}-{karabo_da_control}-S00000.h5'\n", - "\n", "if \"{\" in h5path_ctrl:\n", " h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", "\n", diff --git a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb index bca077b808b00ff0430aae356c720e404cd60ab0..395d7efaf67d0ca34256c561c8eee2e18fe957bf 100644 --- a/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_Gain_Correct_and_Verify_NBC.ipynb @@ -53,7 +53,7 @@ "bias_voltage = 180 # will be overwritten by value in file\n", "\n", "# TODO: Remove\n", - "db_module = \"\" # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.\n", + "db_module = [\"\"] # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.\n", "\n", "\n", "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n", diff --git a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb index 8391912ab25ec71e16f5c587b3746688da47b4f2..f756d2cb3260ba34ff6cd488940e6a8e04017477 100644 --- a/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_dark_analysis_all_gains_burst_mode_NBC.ipynb @@ -60,7 +60,7 @@ "operation_mode = '' # Detector operation mode, optional\n", "\n", "# TODO: Remove\n", - "db_module = \"\" # ID of module in calibration database. TODO: remove from calibration_configurations." + "db_module = [\"\"] # ID of module in calibration database. TODO: remove from calibration_configurations." ] }, { diff --git a/notebooks/REMI/REMI_Digitize_and_Transform.ipynb b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8d7ce5a454e0517b27cedbb4b18600f2ad371f7e --- /dev/null +++ b/notebooks/REMI/REMI_Digitize_and_Transform.ipynb @@ -0,0 +1,1015 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Data selection parameters.\n", + "run = 104 # Run ID.\n", + "in_folder = '/gpfs/exfel/exp/SQS/202101/p002535/raw' # Partial input path appended with run ID.\n", + "out_folder = '/gpfs/exfel/exp/SQS/202101/p002535/scratch/cal_test' # Full path to output folder.\n", + "\n", + "calib_config_path = '/gpfs/exfel/exp/SQS/202101/p002535/usr/config_board2+4.yaml' # Path to correction and transform configuration\n", + "\n", + "# These parameters are required by xfel-calibrate but ignored in this notebook.\n", + "cycle = '' # Proposal cycle, currently not used.\n", + "cal_db_timeout = 0 # Calibration DB timeout, currently not used.\n", + "cal_db_interface = 'foo' # Calibration DB interface, currently not used.\n", + "karabo_da = 'bar' # Karabo data aggregator name, currently not used\n", + "\n", + "# Output parameters.\n", + "karabo_id = 'SQS_REMI_DLD6' # Karabo device ID root for virtual output device.\n", + "proposal = '' # Proposal, leave empty for auto detection based on in_folder\n", + "out_filename = 'CORR-R{run:04d}-REMI01-S{sequence:05d}.h5' # Pattern for output filenames.\n", + "out_seq_len = 5000 # Number of trains per sequence file in output.\n", + "det_device_id = '{karabo_id}/DET/{det_name}' # Karabo device ID for virtual output device.\n", + "det_output_key = 'output' # Pipeline name for fast data output.\n", + "save_raw_triggers = True # Whether to save trigger position in files.\n", + "save_raw_edges = True # Whether to save digitized edge positions in files.\n", + "save_rec_signals = True # Whether to save reconstructed signals (u1-w2, mcp) in files.\n", + "save_rec_hits = True # Whether to save reoncstructed hits (x,y,t,m) in files.\n", + "chunks_triggers = [500] # HDF chunk size for triggers.\n", + "chunks_edges = [500, 7, 50] # HDF chunk size for edges.\n", + "chunks_hits = [50, 50] # HDF chunk size for hits.\n", + "chunks_signals = [50, 50] # HDF chunk size for signals.\n", + "dataset_compression = 'gzip' # HDF compression method.\n", + "dataset_compression_opts = 3 # HDF GZIP compression level.\n", + "\n", + "# Parallelization parameters.\n", + "mp_pulse_info = 8 # Parallelization for pulse statistics.\n", + "mp_find_triggers = 0.5 # Parallelization for finding triggers.\n", + "mp_find_edges = 0.5 # Parallelization for digitizing analog signal.\n", + "mt_avg_trace = 2 # Parallelization for trace averaging.\n", + "mp_rec_hits = 1.0 # Parallelization for hit reconstruction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.colors import LogNorm\n", + "from threadpoolctl import threadpool_limits\n", + "\n", + "import re\n", + "import h5py\n", + "from pathlib import Path\n", + "from datetime import datetime\n", + "\n", + "import pasha as psh\n", + "from extra_data import RunDirectory\n", + "from extra_remi import Analysis, trigger_dt\n", + "from extra_remi.util import timing\n", + "from extra_remi.plots import plot_detector_diagnostics\n", + "from extra_remi.rd_resort import signal_dt, hit_dt\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "calib_config_path = Path(calib_config_path)\n", + "\n", + "if not calib_config_path.is_file():\n", + " # If the path cannot be resolved right now, try the same path relative to in_folder.\n", + " calib_config_path = Path(in_folder) / calib_config_path\n", + " \n", + " if not calib_config_path.is_file():\n", + " # Disallow implicit config file creation.\n", + " raise ValueError('calib_config_path not found - neither absolute nor relative to in_folder')\n", + "\n", + "remi = Analysis(calib_config_path)\n", + "\n", + "with timing('open_run'):\n", + " dc = remi.prepare_dc(RunDirectory(Path(in_folder) / f'r{run:04d}', inc_suspect_trains=True))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transformation parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def print_leaf(leaf, indent=0):\n", + " for key, value in leaf.items():\n", + " if isinstance(value, dict):\n", + " print(indent * 4 * ' ' + key)\n", + " print_leaf(value, indent=indent+1)\n", + " else:\n", + " print(indent * 4 * ' ' + f'{key}: {value}')\n", + " \n", + "print_leaf(remi.tree)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pulse and trigger information" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Count pulses per train and compute offsets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Based on the pulse pattern tables, three global variables are obtained:\n", + "#\n", + "# * `pulse_counts [int32: len(dc.train_ids)]` containing the number of pulses per train.\n", + "# * `pulse_offsets [int32: len(dc.train_ids)]` containing the global offset for the first pulse of each train.\n", + "# * `num_pulses = pulse_counts.sum(axis=0)`\n", + "\n", + "with timing('pulse_info'):\n", + " pulse_counts, pulse_offsets, num_pulses = remi.get_pulse_info(dc, mp_pulse_info)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(num=1, ncols=1, nrows=1, figsize=(9, 4), clear=True)\n", + "\n", + "ax.set_title('Pulse count')\n", + "ax.plot(dc.train_ids, pulse_counts, lw=1)\n", + "ax.set_xlabel('Train ID')\n", + "ax.set_ylabel('Number of pulses')\n", + "ax.set_ylim(0, max(300, pulse_counts.max() + 10))\n", + "ax.ticklabel_format(style='plain')\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find triggers\n", + "\n", + "The trigger defines the boundary of a pulse on the digitizer trace, which is stored per train." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# A trigger defines the boundary of a pulse on the digitizer trace stored per train. This cell creates a\n", + "# global variable:\n", + "# * `triggers [(start: int32, stop: int32, offset: float64): num_pulses]` containing the triggers for each\n", + "# pulse.\n", + "# \n", + "# Triggers may be obtained through two different methods:\n", + "# \n", + "# * `ppt` uses the pulse puttern table to locate the pulse positions on the trace. Only number of pulses and\n", + "# their distance can be drawn this way, leaving the absolute offset for the very first pulse to be\n", + "# configured via `trigger/ppt/first_pulse_offset`.\n", + "# \n", + "# * `edge` uses the digitizer channel `trigger/edge/channel` and builds triggers around the edges found on it.\n", + "# The boundaries relative to this edge may be configured with the `group_start`, `group_end` and `dead_time`\n", + "# parameters.\n", + "\n", + "psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_triggers))\n", + "\n", + "triggers = psh.alloc(shape=(num_pulses,), dtype=trigger_dt, fill=(-1, -1, np.nan))\n", + "\n", + "if remi['trigger']['method'] == 'ppt':\n", + " from euxfel_bunch_pattern import indices_at_sase\n", + " \n", + " pptc = remi['trigger']['ppt']\n", + " \n", + " keydata = dc[remi.get_ppt_sourcekey()]\n", + " sase = remi['instrument']['timeserver']['sase']\n", + " first_pulse_offset = pptc['first_pulse_offset']\n", + " single_pulse_length = pptc['single_pulse_length']\n", + " clock_factor = remi['digitizer']['clock_factor']\n", + " \n", + " def trigger_by_ppt(worker_id, index, train_id, ppt):\n", + " abs_pos = indices_at_sase(ppt, sase)\n", + " num_pulses = len(abs_pos)\n", + " \n", + " if num_pulses > 1:\n", + " rel_pos = (abs_pos - abs_pos[0])\n", + " pulse_len = rel_pos[1] - rel_pos[0]\n", + " elif num_pulses == 1:\n", + " rel_pos = np.zeros(1)\n", + " pulse_len = single_pulse_length\n", + " elif num_pulses == 0:\n", + " return\n", + " \n", + " pulse_offset = pulse_offsets[index]\n", + " pulse_count = pulse_counts[index]\n", + " train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]\n", + " \n", + " start_frac = first_pulse_offset + rel_pos * 2 * clock_factor\n", + " start = start_frac.astype(int)\n", + " \n", + " if start.shape != train_triggers.shape:\n", + " print(f'pulse number mismatch in train {index} / {train_id}, SKIPPING')\n", + " return\n", + " \n", + " train_triggers['start'] = start\n", + " train_triggers['stop'] = start + int(pulse_len * 2 * clock_factor) - 1\n", + " train_triggers['offset'] = start_frac - start\n", + " \n", + " with timing('find_triggers'):\n", + " psh.map(trigger_by_ppt, keydata)\n", + " \n", + "elif remi['trigger']['method'] == 'edge':\n", + " edgec = remi['trigger']['edge']\n", + " keydata = dc[remi.get_channel_sourcekey(edgec['channel'])]\n", + "\n", + " trace_len = keydata.entry_shape[0]\n", + " group_start = edgec['group_start']\n", + " group_end = edgec['group_end']\n", + " dead_time = edgec['dead_time']\n", + " \n", + " def prepare_trigger_edge_worker(worker_id):\n", + " correct_func = remi.get_baseline_corrector()\n", + " discr_func, discr_params = remi.get_discriminator([edgec['channel']])\n", + "\n", + " bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)\n", + " bl_sym = remi['digitizer']['baseline_symmetry']\n", + " \n", + " edge_pos = np.empty(10000, dtype=np.float64)\n", + " trace_corr = np.empty(trace_len, dtype=np.float64)\n", + " baselines = np.empty(bl_sym, dtype=np.float64)\n", + " yield\n", + " \n", + " def group_boundaries(trigger_edges):\n", + " cur_edge = trigger_edges[0]\n", + "\n", + " for i in range(1, len(trigger_edges)):\n", + " next_edge = trigger_edges[i]\n", + " edge_diff = int(next_edge) - int(cur_edge)\n", + "\n", + " if edge_diff <= dead_time:\n", + " pass\n", + "\n", + " elif edge_diff > dead_time and edge_diff >= group_end:\n", + " yield cur_edge, int(cur_edge) + group_start, int(cur_edge) + group_end\n", + " cur_edge = trigger_edges[i]\n", + "\n", + " elif edge_diff > dead_time and edge_diff < group_end:\n", + " yield cur_edge, int(cur_edge) + group_start, int(next_edge)\n", + " cur_edge = trigger_edges[i]\n", + "\n", + " elif edge_diff < group_end:\n", + " pass\n", + "\n", + " yield cur_edge, int(cur_edge) + group_start, int(cur_edge) + group_end\n", + " \n", + " @psh.with_init(prepare_trigger_edge_worker)\n", + " def trigger_by_edge(worker_id, index, train_id, trace_raw):\n", + " correct_func(trace_raw, trace_corr, baselines, bl_start, bl_stop)\n", + "\n", + " pulse_offset = pulse_offsets[index]\n", + " pulse_count = pulse_counts[index]\n", + " num_triggers = discr_func(trace_corr, edge_pos, **discr_params[0])\n", + "\n", + " groups = group_boundaries(edge_pos[:num_triggers])\n", + " train_triggers = triggers[pulse_offset:pulse_offset+pulse_count]\n", + " \n", + " if num_triggers == 0 or num_triggers != pulse_count:\n", + " print(f'index={index}, train_id={train_id}: Unexpected '\n", + " f'num_triggers={num_triggers} for pulse_count={pulse_count}')\n", + " return\n", + "\n", + " for (edge, start, stop), pulse_trigger in zip(groups, train_triggers):\n", + " pulse_trigger['start'] = start\n", + " pulse_trigger['stop'] = stop\n", + " pulse_trigger['offset'] = start - edge\n", + " \n", + " with timing('find_triggers'):\n", + " psh.map(trigger_by_edge, keydata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (lx, rx) = plt.subplots(num=2, ncols=2, nrows=1, figsize=(9, 4), clear=True,\n", + " gridspec_kw=dict(top=0.75))\n", + "\n", + "# Display ~400 pulses or 10 trains, whatever is lower\n", + "n_trains = max(abs(pulse_offsets - 400).argmin(), 10)\n", + "\n", + "visible_trigger_starts = triggers['start'][:pulse_offsets[n_trains]]\n", + "\n", + "lx.plot(visible_trigger_starts, '.', ms=2)\n", + "lx.vlines(pulse_offsets[:n_trains], 0, visible_trigger_starts.max(), color='grey', linewidth=1, alpha=0.2)\n", + "lx.tick_params(right=True)\n", + "\n", + "lx.set_xlabel('Pulse index')\n", + "lx.set_xlim(-15, pulse_offsets[n_trains]+15)\n", + "\n", + "lx.set_ylabel('Trigger position')\n", + "lx.set_ylim(0, visible_trigger_starts.max())\n", + "\n", + "train_lx = lx.twiny()\n", + "train_lx.set_xlabel('Train ID', labelpad=8)\n", + "train_lx.set_xlim(lx.get_xlim())\n", + "train_lx.set_xticks(pulse_offsets[:n_trains])\n", + "train_lx.set_xticklabels([str(int(x)) for x in dc.train_ids[:n_trains]],\n", + " rotation=-45, fontsize='x-small')\n", + "\n", + "rx.plot(triggers['start'], lw=0.2)\n", + "rx.tick_params(left=False, labelleft=False, right=True, labelright=True)\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analog signal to digital edges" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find edges in analog signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_find_edges))\n", + "threadpool_limits(limits=remi.get_num_workers(mt_avg_trace))\n", + "\n", + "edges_by_det = {}\n", + "avg_traces_by_det = {}\n", + "\n", + "for det_name, det in remi['detector'].items():\n", + " det_sourcekeys = remi.get_detector_sourcekeys(det_name)\n", + " det_get_traces = remi.get_traces_getter(det_name)\n", + " trace_len = dc[next(iter(det_sourcekeys))].entry_shape[0]\n", + " \n", + " edges = psh.alloc(shape=(num_pulses, 7, det['max_hits']),\n", + " dtype=np.float64, fill=np.nan)\n", + " avg_traces = psh.alloc_per_worker(shape=(7, trace_len), dtype=np.float64)\n", + " \n", + " def prepare_edge_worker(worker_id):\n", + " correct_func = remi.get_baseline_corrector()\n", + " discr_func, discr_params = remi.get_discriminator(det['channels'])\n", + " \n", + " source_name = remi['digitizer']['source']\n", + " bl_start, bl_stop, _ = remi.get_baseline_limits(trace_len)\n", + " bl_sym = remi['digitizer']['baseline_symmetry']\n", + " time_cal = remi.get_time_calibration()\n", + " \n", + " traces_corr = np.empty((7, trace_len), dtype=np.float64)\n", + " baselines = np.empty(bl_sym, dtype=np.float64)\n", + " yield\n", + " \n", + " @psh.with_init(prepare_edge_worker)\n", + " def find_edges(worker_id, index, train_id, data):\n", + " try:\n", + " data = det_get_traces(data[source_name])\n", + " except KeyError:\n", + " return\n", + "\n", + " for channel_idx in range(7):\n", + " correct_func(data[channel_idx], traces_corr[channel_idx],\n", + " baselines, bl_start, bl_stop)\n", + "\n", + " avg_traces[worker_id] += traces_corr\n", + "\n", + " pulses_slice = np.s_[pulse_offsets[index]:pulse_offsets[index]+pulse_counts[index]]\n", + "\n", + " for trigger, pulse_edges in zip(triggers[pulses_slice], edges[pulses_slice]):\n", + " trigger_slice = np.s_[trigger['start']:trigger['stop']]\n", + " \n", + " for trace, channel_params, channel_edges in zip(traces_corr, discr_params, pulse_edges):\n", + " discr_func(trace[trigger_slice], channel_edges, **channel_params)\n", + "\n", + " pulse_edges += trigger['offset']\n", + " pulse_edges *= time_cal\n", + " \n", + " with timing(f'find_edges, {det_name}'):\n", + " psh.map(find_edges, dc.select(det_sourcekeys))\n", + " \n", + " edges_by_det[det_name] = edges\n", + " avg_traces_by_det[det_name] = avg_traces.sum(axis=0) / len(dc.train_ids)\n", + " \n", + " with np.printoptions(precision=2, suppress=True):\n", + " print(edges[:5, :, :8])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global average of analog signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, det_name in enumerate(remi['detector'].keys()):\n", + " fig, axs = plt.subplots(num=10+i, nrows=7, figsize=(9.5, 8), clear=True,\n", + " gridspec_kw=dict(left=0.1, right=0.98, top=0.98, bottom=0.1))\n", + " fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')\n", + "\n", + " for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):\n", + " axs[edge_idx].plot(avg_traces_by_det[det_name][edge_idx], lw=1)\n", + " axs[edge_idx].tick_params(labelbottom=False)\n", + " axs[edge_idx].set_ylabel(edge_name)\n", + " \n", + " axs[-1].tick_params(labelbottom=True)\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sample for digitized traces" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, det_name in enumerate(remi['detector'].keys()):\n", + " edges = edges_by_det[det_name]\n", + " \n", + " fig = plt.figure(num=100+i, figsize=(9.5, 8))\n", + " grid = fig.add_gridspec(ncols=2, nrows=4, left=0.1, right=0.98, top=0.98, bottom=0.1)\n", + "\n", + " fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')\n", + "\n", + " for signal_idx, signal_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):\n", + " row = (1 + signal_idx // 2) if signal_idx < 6 else 0\n", + " col = (signal_idx % 2) if signal_idx < 6 else np.s_[:]\n", + " ax = fig.add_subplot(grid[row, col])\n", + " \n", + " finite_edges = np.isfinite(edges[:, signal_idx, 0])\n", + " if not finite_edges.any():\n", + " continue\n", + " \n", + " pulse_idx = finite_edges.nonzero()[0][0]\n", + " train_idx = (pulse_idx >= pulse_offsets).nonzero()[0][-1]\n", + " trigger = triggers[pulse_idx]\n", + " \n", + " sourcekey = remi.get_channel_sourcekey(\n", + " remi['detector'][det_name]['channels'][signal_idx])\n", + " train_trace = dc[sourcekey].select_trains(np.s_[train_idx:train_idx+1]).ndarray()[0]\n", + " corr_trace = np.zeros_like(train_trace, dtype=np.float64)\n", + "\n", + " remi.get_baseline_corrector()(\n", + " train_trace, corr_trace,\n", + " np.empty(remi['digitizer']['baseline_symmetry'], dtype=np.float64),\n", + " *remi.get_baseline_limits(len(train_trace))[:2])\n", + " \n", + " pulse_trace = corr_trace[np.s_[trigger['start']:trigger['stop']]]\n", + " \n", + " x_time = remi.get_time_calibration() * (np.arange(len(pulse_trace)) + trigger['offset'])\n", + " \n", + " ax.plot(x_time, pulse_trace, lw=1)\n", + " ax.set_xlim(x_time[0], x_time[-1])\n", + " ax.set_ylim(-200, pulse_trace.max()*1.1)\n", + " ax.text(x_time[-1], pulse_trace.max(),\n", + " f'T{train_idx} P{pulse_idx - pulse_offsets[train_idx]} ',\n", + " va='top', ha='right')\n", + " ax.tick_params(labelbottom=False)\n", + " ax.set_ylabel(signal_name)\n", + " \n", + " ax.vlines(edges[pulse_idx, signal_idx, :], *ax.get_ylim(), color='red', linewidth=1)\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Digitized channel spectra" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "for i, det_name in enumerate(remi['detector'].keys()):\n", + " fig = plt.figure(num=20+i, figsize=(9.5, 6))\n", + " \n", + " edges = edges_by_det[det_name]\n", + " \n", + " min_edge = edges[np.isfinite(edges)].min()\n", + " max_edge = edges[np.isfinite(edges)].max()\n", + "\n", + " grid = fig.add_gridspec(ncols=3, nrows=3, left=0.08, right=0.98, top=0.95, hspace=0.4)\n", + "\n", + " fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')\n", + "\n", + " numx = fig.add_subplot(grid[0, 0])\n", + " numx.set_title('Edges per pulse')\n", + "\n", + " agg_window = num_pulses // 60\n", + " max_num_edges = 0.0\n", + " max_spectral_intensity = 0\n", + " hist_axs = []\n", + "\n", + " for edge_idx, edge_name in enumerate(['u1', 'u2', 'v1', 'v2', 'w1', 'w2', 'mcp']):\n", + " num_edges = np.isfinite(edges[:, edge_idx, :]).sum(axis=1)\n", + " num_edges = num_edges[:((len(num_edges) // agg_window) * agg_window)]\n", + " num_edges = num_edges.reshape(-1, agg_window).mean(axis=1)\n", + "\n", + " if edge_idx < 6:\n", + " plot_kwargs = dict(c=f'C{edge_idx}', ls='solid', lw=1.0)\n", + " else:\n", + " plot_kwargs = dict(c='k', ls='dashed', lw=1.0)\n", + "\n", + " numx.plot(np.arange(len(num_edges)) * agg_window, num_edges, label=edge_name, **plot_kwargs)\n", + " max_num_edges = max(max_num_edges, num_edges.max())\n", + "\n", + " cur_edges = edges[:, edge_idx, :].flatten()\n", + "\n", + " if edge_idx < 6:\n", + " row = 1 + edge_idx % 2\n", + " col = edge_idx // 2\n", + " else:\n", + " row = 0\n", + " col = np.s_[1:3]\n", + "\n", + " ax = fig.add_subplot(grid[row, col])\n", + " ax.set_title(f'TOF spectrum: {edge_name}')\n", + " y, _, _ = ax.hist(cur_edges[np.isfinite(cur_edges)], bins=int((max_edge - min_edge) // 5),\n", + " range=(min_edge, max_edge), color=plot_kwargs['c'], histtype='step', linewidth=1)\n", + " hist_axs.append(ax)\n", + "\n", + " max_spectral_intensity = max(max_spectral_intensity, y.max())\n", + "\n", + " numx.tick_params(labelbottom=False)\n", + " numx.set_ylim(0, 1.2*max_num_edges)\n", + "\n", + " for ax in hist_axs:\n", + " ax.set_ylim(0, max_spectral_intensity*1.1)\n", + " ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3))\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Detector diagnostics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "for i, det_name in enumerate(remi['detector'].keys()):\n", + " edges = edges_by_det[det_name]\n", + " \n", + " sort = remi.get_dld_sorter(det_name)\n", + " \n", + " sum_shifts = sort.sum_shifts if sort.sum_shifts != (0.0, 0.0, 0.0) else None\n", + " \n", + " is_valid = remi.get_presort_mask(edges, edge_idx=0,\n", + " sum_limit=max(sort.uncorrected_time_sum_half_widths),\n", + " sum_shifts=sum_shifts)\n", + " signals, sums = remi.get_signals_and_sums(edges, indices=sort.channel_indices, sum_shifts=sum_shifts,\n", + " mask=is_valid)\n", + " fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=30+i, im_scale=1.5,\n", + " sum_range=max(sort.uncorrected_time_sum_half_widths),\n", + " sorter=sort)\n", + " fig.text(0.02, 0.98, det_name.upper() + ' before corrections', rotation=90, ha='left', va='top', size='x-large')\n", + " \n", + " if remi['detector'][det_name]['use_sum_correction'] or remi['detector'][det_name]['use_pos_correction']:\n", + " n_masked = is_valid.sum()\n", + " signals = np.full((n_masked, 3), np.nan, dtype=np.float64)\n", + " sums = np.full((n_masked, 3), np.nan, dtype=np.float64)\n", + "\n", + " sort.correct(edges[is_valid], signals, sums)\n", + " fig = plot_detector_diagnostics(signals=signals, sums=sums, fig_num=40+i, im_scale=1.5,\n", + " sum_range=max(sort.uncorrected_time_sum_half_widths),\n", + " sorter=sort)\n", + " fig.text(0.02, 0.98, det_name.upper() + ' after corrections', rotation=90, ha='left', va='top', size='x-large')\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hit reconstruction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "psh.set_default_context('processes', num_workers=remi.get_num_workers(mp_rec_hits))\n", + "\n", + "signals_by_det = {}\n", + "hits_by_det = {}\n", + "hit_counts_by_det = {}\n", + "\n", + "for det_name, det in remi['detector'].items():\n", + " edges = edges_by_det[det_name]\n", + " \n", + " signals = psh.alloc(shape=(num_pulses, 50), dtype=signal_dt, fill=np.nan)\n", + " hits = psh.alloc(shape=(num_pulses, 50), dtype=hit_dt, fill=(np.nan, np.nan, np.nan, -1))\n", + " hit_counts = psh.alloc(shape=len(dc.train_ids), dtype=np.uint32)\n", + "\n", + " def prepare_hit_worker(worker_id):\n", + " sort = remi.get_dld_sorter(det_name)\n", + " yield\n", + "\n", + " @psh.with_init(prepare_hit_worker)\n", + " def reconstruct_hits(worker_id, index, train_id):\n", + " hit_counts[index] += sort.run_on_train(\n", + " edges, signals, hits, pulse_offsets[index], pulse_offsets[index] + pulse_counts[index])\n", + " \n", + " with timing(f'rec_hits, {det_name}'):\n", + " psh.map(reconstruct_hits, dc.train_ids)\n", + " \n", + " signals_by_det[det_name] = signals\n", + " hits_by_det[det_name] = hits\n", + " hit_counts_by_det[det_name] = hit_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(num=50+i, figsize=(9.5, 4), ncols=1, clear=True,\n", + " gridspec_kw=dict(top=0.92, right=0.98, left=0.05, bottom=0.12))\n", + " \n", + "max_num_hits = 0.0\n", + " \n", + "for det_name in remi['detector'].keys():\n", + " agg_window = num_pulses // 1000\n", + " \n", + " num_hits = np.isfinite(hits_by_det[det_name]['x']).sum(axis=1)\n", + " num_hits = num_hits[:(len(num_hits) // agg_window) * agg_window]\n", + " num_hits = num_hits.reshape(-1, agg_window).mean(axis=1)\n", + " max_num_hits = max(max_num_hits, num_hits.max())\n", + " \n", + " ax.plot(np.arange(0, (num_pulses // agg_window) * agg_window, agg_window), num_hits,\n", + " lw=1, label=det_name.upper())\n", + " \n", + "ax.set_title('Hits per pulse')\n", + "ax.set_xlabel('Pulse index')\n", + "ax.set_ylim(0, max_num_hits*1.1)\n", + "ax.legend()\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reconstruction methods\n", + "Each hit may be reconstructed by one of 19 different methods. These differ by the number of real signals across the channels, which could be combined to form the hit. Each of these methods is designed by a number between `0` and `19` (with empty hits using `-1`), which can be found in the `m` key of a hit, e.g.:\n", + "\n", + "* `0`: All six anode signals and the corresponding MCP signal were found.\n", + "* `4`: One signal on layer `u` is missing, all other signals for this event were found.\n", + "* `18`: Only one anode signal on each layer was found and the MCP signal is missing. There is no way to check whether this combination of signals is actually valid.\n", + "\n", + "| Method | `u+v+w +mcp` |\n", + "| - | - |\n", + "| 0 | `2+2+2 +1` |\n", + "| 1 | `0+2+2 +1` |\n", + "| 2 | `2+0+2 +1` |\n", + "| 3 | `2+2+0 +1` |\n", + "| 4 | `1+2+2 +1` (2 permutations) |\n", + "| 5 | `2+1+2 +1` (2 permutations) |\n", + "| 6 | `2+2+1 +1` (2 permutations) |\n", + "| 7 | `2+2+2 +0` |\n", + "| 8 | `0+2+2 +0` |\n", + "| 9 | `2+0+2 +0` |\n", + "| 10 | `2+2+0 +0` |\n", + "| 11 | `1+2+2 +0` (2 permutations) |\n", + "| 12 | `2+1+2 +0` (2 permutations) |\n", + "| 13 | `2+2+1 +0` (2 permutations) |\n", + "| 14 | `2+1+1 +1` `1+2+1 +1` `1+1+2 +1` (12 permutations) |\n", + "| 15 | `2+1+0 +1` `2+0+1 +1` `1+2+0 +1` `1+0+2 +1` `0+2+1 +1` `0+1+2 +1` (12 permutations) |\n", + "| 16 | `1+1+1 +1` (8 permutations) |\n", + "| 17 | `2+1+1 +0` `1+2+1 +0` `1+1+2 +0` (12 permutations) |\n", + "| 18 | `1+1+1 +0` (8 permutations) |\n", + "| 19 | `2+1+0 +0` `2+0+1 +0` `1+2+0 +0` `1+0+2 +0` `0+1+2 +0` `0+2+1 +0` (12 permutations) |\n", + "\n", + "* For hits reconstructed with method `> 10`, extra attention should be given to ensure they add meaningful signal.\n", + "* Any method `> 14` has to considered risky, because neither a time sum nor the position can be checked. If the scale factors and/or `w` shift are not correct, then the number of events reconstructed with the risky methods will increase. They will most likely be *ghost hits*, which do not correspond to actual impacts on the detector." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "for i, det_name in enumerate(remi['detector'].keys()):\n", + " hits = hits_by_det[det_name]\n", + " \n", + " fig, ax = plt.subplots(num=60+i, figsize=(9.5, 5), ncols=1, clear=True,\n", + " gridspec_kw=dict(left=0.08, right=0.91, top=0.8))\n", + " \n", + " fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')\n", + "\n", + " method_bins = np.bincount(hits['m'][hits['m'] >= 0], minlength=20)\n", + " ax.bar(np.arange(20), method_bins, width=0.5)\n", + "\n", + " ax.set_xlabel('Reconstruction method')\n", + " ax.set_xlim(-0.5, 19.5)\n", + " ax.set_xticks(np.arange(20))\n", + "\n", + " ax.set_ylabel('Number of hits')\n", + " ax.set_ylim(0, method_bins.max()*1.05)\n", + " ylims = ax.get_ylim()\n", + "\n", + " ax.tick_params(which='both', right=True, labelright=True)\n", + "\n", + " num_risky = method_bins[15:].sum()\n", + " num_total = method_bins.sum()\n", + "\n", + " ax.text(14.2, method_bins.max(), f'{(100*(num_total-num_risky)/num_total):.2g}%',\n", + " va='top', ha='right', color='black')\n", + " ax.text(14.8, method_bins.max(), f'{(100*num_risky/num_total):.2g}%',\n", + " va='top', ha='left', color='red')\n", + "\n", + " ax.fill([14.5, 19.5, 19.5, 14.5], [ylims[0], ylims[0], ylims[1], ylims[1]], c='r', alpha=0.2)\n", + "\n", + " labelx = ax.twiny()\n", + " labelx.set_xlim(*ax.get_xlim())\n", + " labelx.set_xticks(ax.get_xticks())\n", + " labelx.set_xticklabels([\n", + " '2+2+2 +1',\n", + " '0+2+2 +1', '2+0+2 +1', '2+2+0 +1',\n", + " '1+2+2 +1', '2+1+2 +1', '2+2+1 +1',\n", + " '2+2+2 +0',\n", + " '0+2+2 +0', '2+0+2 +0', '2+2+0 +0', '1+2+2 +0', '2+1+2 +0', '2+2+1 +0',\n", + " '2+1+1 +1',\n", + " '2+1+0 +1',\n", + " '1+1+1 +1',\n", + " '2+1+1 +0',\n", + " '1+1+1 +0',\n", + " '2+1+0 +0',\n", + " ], rotation=90)\n", + " \n", + " min_rel_tick = np.ceil((ax.get_ylim()[0] / num_total) / 0.1) * 0.1\n", + " max_rel_tick = np.floor((method_bins.max() / num_total) / 0.1) * 0.1\n", + " \n", + " rely = ax.twinx()\n", + " rely.set_ylim(*ax.get_ylim())\n", + " rely.set_yticks(np.arange(0.0, max_rel_tick+0.01, 0.1)*num_total)\n", + " rely.set_yticks(np.arange(0.0, ylims[1]/num_total, 0.02)*num_total, minor=True)\n", + " rely.set_yticklabels([f'{(y/num_total)*100:.0f}%' for y in rely.get_yticks()])\n", + " rely.set_ylabel('Percentage of total hits')\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Detector image and fishes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "for i, det_name in enumerate(remi['detector'].keys()):\n", + " flat_hits = hits_by_det[det_name].reshape(-1)\n", + " flat_hits = flat_hits[np.isfinite(flat_hits[:]['x'])]\n", + " flat_hits = flat_hits[flat_hits['m'] < 10]\n", + "\n", + " fig = plt.figure(num=70+i, figsize=(9, 13.5))\n", + " \n", + " fig.text(0.02, 0.98, det_name.upper(), rotation=90, ha='left', va='top', size='x-large')\n", + " fig.text(0.02, 0.02, det_name.upper(), rotation=90, ha='left', va='bottom', size='x-large')\n", + " \n", + " imp = fig.add_axes([0.1 + 0.25/2, 0.56, 0.6, 0.4])\n", + " txp = fig.add_axes([0.1, 0.28, 0.85, 0.22])\n", + " typ = fig.add_axes([0.1, 0.04, 0.85, 0.22])\n", + " \n", + " im_radius = remi['detector'][det_name]['mcp_radius']*1.1\n", + " \n", + " min_tof = flat_hits['t'].min()\n", + " max_tof = flat_hits['t'].max()\n", + " \n", + " imp.hist2d(flat_hits['x'], flat_hits['y'], bins=(256, 256),\n", + " range=[[-im_radius, im_radius], [-im_radius, im_radius]], norm=LogNorm())\n", + " imp.xaxis.set_label_position('top')\n", + " imp.set_xlabel('X / mm')\n", + " imp.set_ylabel('Y / mm')\n", + " imp.tick_params(right=True, labelright=True, top=True, labeltop=True)\n", + " imp.grid()\n", + "\n", + " for ax, dim_label in zip([txp, typ], ['x', 'y']):\n", + " ax.hist2d(flat_hits['t'], flat_hits[dim_label], bins=(int((max_tof - min_tof) // 5), 256),\n", + " range=[[min_tof, max_tof], [-im_radius, im_radius]], norm=LogNorm())\n", + " ax.set_ylabel(f'{dim_label.upper()} / mm')\n", + " \n", + " typ.set_xlabel('Time-of-flight / ns')\n", + " txp.tick_params(bottom=True, labelbottom=False, top=True, labeltop=True, right=True, labelright=True)\n", + " typ.tick_params(right=True, labelright=True, top=True)\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transformed data files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "inp_seq_len = len(dc.files[0].train_ids)\n", + "seq_len = out_seq_len if out_seq_len > 0 else inp_seq_len\n", + "\n", + "# Take first field for detector data as sample for naming\n", + "out_path = (Path(out_folder) / Path(out_filename)).resolve()\n", + "seq_len = 5000\n", + "\n", + "t_write = timing('write_files')\n", + "t_write.__enter__()\n", + "\n", + "out_path.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + "dataset_kwargs = {k[8:]: v for k, v in locals().items() if k.startswith('dataset_compression')}\n", + "\n", + "metadata = dc.run_metadata()\n", + "daq_library_bytes = metadata.get('daqLibrary', '0.0').encode('ascii')\n", + "karabo_framework_bytes = metadata.get('karaboFramework', '0.0').encode('ascii')\n", + "proposal_number = int(proposal) if proposal else metadata.get('proposalNumber', -1)\n", + "\n", + "print('Writing sequence files', flush=True, end='')\n", + "\n", + "for seq_id, start in enumerate(range(0, len(dc.train_ids), seq_len)):\n", + " train_ids = dc.train_ids[start:start+seq_len]\n", + " first_train_idx = start\n", + " final_train_idx = start + len(train_ids) - 1\n", + " \n", + " train_sel = np.s_[first_train_idx:final_train_idx+1]\n", + " pulse_sel = np.s_[pulse_offsets[first_train_idx]:(pulse_offsets[final_train_idx]+pulse_counts[final_train_idx])]\n", + "\n", + " with h5py.File(str(out_path).format(run=run, sequence=seq_id), 'w') as h5out:\n", + " h5out.create_dataset('INDEX/trainId', data=train_ids, dtype=np.uint64)\n", + " h5out.create_dataset('INDEX/timestamp', data=np.zeros_like(train_ids, dtype=np.uint64), dtype=np.uint64)\n", + " h5out.create_dataset('INDEX/flag', data=np.ones_like(train_ids, dtype=np.int32))\n", + " \n", + " m_data_sources = []\n", + " \n", + " for det_name in remi['detector']:\n", + " cur_device_id = det_device_id.format(karabo_id=karabo_id, det_name=det_name.upper())\n", + " cur_fast_data = f\"{cur_device_id}:{det_output_key}\"\n", + " \n", + " pipeline_prefixes = set()\n", + " \n", + " h5out.create_group(f'CONTROL/{cur_device_id}')\n", + " run_group = h5out.create_group(f'RUN/{cur_device_id}')\n", + " remi.attach_detector_config(det_name, run_group)\n", + " \n", + " h5out.create_group(f'INDEX/{cur_device_id}')\n", + " h5out.create_dataset(f'INDEX/{cur_device_id}/count',\n", + " data=np.ones_like(train_ids), dtype=np.uint64)\n", + " h5out.create_dataset(f'INDEX/{cur_device_id}/first',\n", + " data=np.arange(len(train_ids)), dtype=np.uint64)\n", + " \n", + " m_data_sources.append(('CONTROL', cur_device_id))\n", + " \n", + " for flag, data, chunks, path in [\n", + " (save_raw_triggers, triggers, chunks_triggers, 'raw/triggers'),\n", + " (save_raw_edges, edges_by_det[det_name], chunks_edges, 'raw/edges'),\n", + " (save_rec_signals, signals_by_det[det_name], chunks_signals, 'rec/signals'),\n", + " (save_rec_hits, hits_by_det[det_name], chunks_hits, 'rec/hits')\n", + " ]:\n", + " if not flag:\n", + " continue\n", + " \n", + " subdata = data[pulse_sel]\n", + "\n", + " kwargs = dict(data=subdata, chunks=tuple(chunks), **dataset_kwargs) \\\n", + " if len(subdata) > 0 else dict(shape=(0,), dtype=data.dtype)\n", + " \n", + " h5out.create_dataset(f'INSTRUMENT/{cur_fast_data}/{path}', **kwargs)\n", + " pipeline_prefixes.add(path[:path.find('/')])\n", + " \n", + " for pipeline_prefix in pipeline_prefixes:\n", + " h5out.create_dataset(f'INDEX/{cur_fast_data}/{pipeline_prefix}/count',\n", + " data=pulse_counts[train_sel])\n", + " h5out.create_dataset(f'INDEX/{cur_fast_data}/{pipeline_prefix}/first',\n", + " data=pulse_offsets[train_sel] - pulse_offsets[first_train_idx])\n", + " \n", + " m_data_sources.append(('INSTRUMENT', f'{cur_fast_data}/{pipeline_prefix}'))\n", + " \n", + " now_str = datetime.now().strftime('%Y%m%dT%H%M%SZ').encode('ascii')\n", + " \n", + " h5m = h5out.require_group('METADATA')\n", + " h5m.create_dataset('creationDate', shape=(1,), data=now_str)\n", + " h5m.create_dataset('daqLibrary', shape=(1,), data=daq_library_bytes)\n", + " h5m.create_dataset('dataFormatVersion', shape=(1,), data=b'1.0')\n", + " \n", + " m_data_sources.sort(key=lambda x: f'{x[0]}/{x[1]}')\n", + " h5m.create_dataset('dataSources/dataSourceId', shape=(len(m_data_sources),),\n", + " data=[f'{x[0]}/{x[1]}'.encode('ascii') for x in m_data_sources])\n", + " h5m.create_dataset('dataSources/deviceId', shape=(len(m_data_sources),),\n", + " data=[x[1].encode('ascii') for x in m_data_sources])\n", + " h5m.create_dataset('dataSources/root', shape=(len(m_data_sources),),\n", + " data=[x[0].encode('ascii') for x in m_data_sources])\n", + "\n", + " h5m.create_dataset('karaboFramework', shape=(1,), data=karabo_framework_bytes)\n", + " h5m.create_dataset('proposalNumber', shape=(1,), dtype=np.uint32, data=proposal_number)\n", + " h5m.create_dataset('runNumber', shape=(1,), dtype=np.uint32, data=run)\n", + " h5m.create_dataset('sequenceNumber', shape=(1,), dtype=np.uint32, data=seq_id)\n", + " h5m.create_dataset('updateDate', shape=(1,), data=now_str)\n", + " \n", + " print('.', flush=True, end='')\n", + " \n", + "print('')\n", + "t_write.__exit__()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sqs-remi", + "language": "python", + "name": "sqs-remi" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/ePix100/Correction_ePix100_NBC.ipynb b/notebooks/ePix100/Correction_ePix100_NBC.ipynb index 83ffb1e5983c8f2666be9b7f394ea54af5c83ad0..c241f8e270cdc3bbaffc18c13211915bdd562771 100644 --- a/notebooks/ePix100/Correction_ePix100_NBC.ipynb +++ b/notebooks/ePix100/Correction_ePix100_NBC.ipynb @@ -111,7 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "if absolute_gain :\n", + "if absolute_gain:\n", " relative_gain = True" ] }, @@ -291,10 +291,11 @@ "\n", "if relative_gain and const_data[\"RelativeGain\"] is None:\n", " print(\n", - " \"WARNING: RelativeGain map is requested, but not found./n\"\n", + " \"WARNING: RelativeGain map is requested, but not found.\\n\"\n", " \"No gain correction will be applied\"\n", " )\n", " relative_gain = False\n", + " absolute_gain = False\n", " plot_unit = 'ADU'" ] }, diff --git a/setup.py b/setup.py index 5b4bf926e8ce2ac197b36e8b3163da3894a661da..6e84ae0b0e2c2aab1c1aa4a70693903d2f531a5b 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ from distutils.command.build import build -from distutils.extension import Extension from subprocess import check_output import numpy from Cython.Build import cythonize from Cython.Distutils import build_ext +from setuptools.extension import Extension from setuptools import find_packages, setup from src.xfel_calibrate.notebooks import notebooks @@ -71,21 +71,22 @@ setup( "build": PreInstallCommand, "build_ext": build_ext, }, - ext_modules=cythonize(ext_modules), + ext_modules=cythonize(ext_modules, language_level=3), install_requires=[ "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.0.9", # noqa - "XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.5.6-2.10.0#subdirectory=lib", # noqa + "XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.7.0", # noqa "Cython==0.29.21", "Jinja2==2.11.2", "astcheck==0.2.5", "astsearch==0.2.0", "cfelpyutils==1.0.1", "dill==0.3.0", + "docutils==0.17.1", "dynaconf==3.1.4", "extra_data==1.8.0", "extra_geom==1.6.0", "gitpython==3.1.0", - "h5py==3.3.0", + "h5py==3.5.0", "iminuit==1.3.8", "ipykernel==5.1.4", "ipyparallel==6.2.4", @@ -107,7 +108,7 @@ setup( "numpy==1.20.3", "pasha==0.1.0", "prettytable==0.7.2", - "princess==0.4", + "princess==0.5", "pypandoc==1.4", "python-dateutil==2.8.1", "pyyaml==5.3", @@ -139,7 +140,7 @@ setup( "pre-commit", ], }, - python_requires=">=3.6", + python_requires=">=3.8", classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", diff --git a/src/cal_tools/agipdalgs.pyx b/src/cal_tools/agipdalgs.pyx index cacc0f549bbce04b4d25b74874488b0ce3cf6f3e..954d63d7055cbccc176c885681cdce354519f672 100644 --- a/src/cal_tools/agipdalgs.pyx +++ b/src/cal_tools/agipdalgs.pyx @@ -1,11 +1,56 @@ import numpy as np -cimport cython +from cython cimport boundscheck, wraparound +from cython.view cimport contiguous +from cython.parallel import prange cimport numpy as cnp -@cython.boundscheck(False) -@cython.wraparound(False) +# Separate fused types for input and output to allow for the +# combination to occur. +ctypedef fused inp_t: + int + unsigned int + float + double + +ctypedef fused outp_t: + int + unsigned int + float + double + + +@wraparound(False) +@boundscheck(False) +def transpose_constant(outp_t[:, :, :, ::contiguous] dst, + inp_t[:, :, :, ::contiguous] src, + int num_threads=0): + """ + Optimized and parallelized constant transposition. + + Will work for any 4D array, but the iteration order is handpicked + to be fast for AGIPD constants. + """ + + if not (dst.shape[0] == src.shape[3] and dst.shape[1] == src.shape[2] and + dst.shape[2] == src.shape[1] and dst.shape[3] == src.shape[0]): + raise ValueError(f'dst shape {np.array(dst.shape)[:4]} must be ' + f'reversed src shape {np.array(src.shape)[:4]}') + + cdef int y, x, c, g + + # Optimized iteration (on INTEL) order via profiling, do not change + # and don't ask me why! + for c in prange(src.shape[2], nogil=True, num_threads=num_threads): + for y in range(src.shape[0]): + for x in range(src.shape[1]): + for g in range(src.shape[3]): + dst[g, c, x, y] = <outp_t>src[y, x, c, g] + + +@boundscheck(False) +@wraparound(False) def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20, cnp.ndarray[cnp.float32_t, ndim=2] weights=None): """ @@ -38,8 +83,8 @@ def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20, return ret, np.linspace(min, max, bins+1) -@cython.boundscheck(False) -@cython.wraparound(False) +@boundscheck(False) +@wraparound(False) def histogram2d(cnp.ndarray[float, ndim=1] data_x, cnp.ndarray[float, ndim=1] data_y, range=((0,1), (0,1)), bins=(20, 20)): """ @@ -75,8 +120,8 @@ def histogram2d(cnp.ndarray[float, ndim=1] data_x, cnp.ndarray[float, ndim=1] da return ret, np.linspace(min_x, max_x, bins_x+1), np.linspace(min_y, max_y, bins_y+1) -@cython.boundscheck(False) -@cython.wraparound(False) +@boundscheck(False) +@wraparound(False) def gain_choose(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.float32_t, ndim=4] choices): """Specialised fast equivalent of np.choose(), to select data for a per-pixel gain stage""" cdef int i, j, k @@ -96,8 +141,8 @@ def gain_choose(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.float32_t, n return out -@cython.boundscheck(False) -@cython.wraparound(False) +@boundscheck(False) +@wraparound(False) def gain_choose_int(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.int32_t, ndim=4] choices): """Specialised fast equivalent of np.choose(), to select data for a per-pixel gain stage""" cdef int i, j, k @@ -117,8 +162,8 @@ def gain_choose_int(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.int32_t, return out -@cython.boundscheck(False) -@cython.wraparound(False) +@boundscheck(False) +@wraparound(False) def sum_and_count_in_range_asic(cnp.ndarray[float, ndim=4] arr, float lower, float upper): """ Return the sum & count of values where lower <= x <= upper, @@ -148,8 +193,8 @@ def sum_and_count_in_range_asic(cnp.ndarray[float, ndim=4] arr, float lower, flo return sum_, count -@cython.boundscheck(False) -@cython.wraparound(False) +@boundscheck(False) +@wraparound(False) def sum_and_count_in_range_cell(cnp.ndarray[float, ndim=4] arr, float lower, float upper): """ Return the sum & count of values where lower <= x <= upper, diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py index 121c120c81d12c171c6c0be2679cc1a7ade8415f..ec6b4e25c7fe896a6e5b7fa2b99408ccb12ae627 100644 --- a/src/cal_tools/agipdlib.py +++ b/src/cal_tools/agipdlib.py @@ -1,3 +1,4 @@ +import os import posixpath import traceback import zlib @@ -8,6 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple import h5py import numpy as np import sharedmem +from extra_data import DataCollection from iCalibrationDB import Conditions, Constants from cal_tools import agipdalgs as calgs @@ -185,25 +187,61 @@ def get_integration_time(fname: str, h5path_ctrl: str) -> int: return 12 +class CellSelection: + """Selection of detector memory cells (abstract class)""" + row_size = 32 + ncell_max = 352 + CM_NONE = 0 + CM_PRESEL = 1 + CM_FINSEL = 2 + + def get_cells_on_trains( + self, trains_sel: List[int], cm: int = 0 + ) -> np.array: + """Returns mask of cells selected for processing + + :param train_sel: list of a train ids selected for processing + :param cm: flag indicates the final selection or interim selection + for common-mode correction + """ + raise NotImplementedError + + def msg(self): + """Return log message on initialization""" + raise NotImplementedError + + @staticmethod + def _sel_for_cm(flag, flag_cm, cm): + if cm == CellSelection.CM_NONE: + return flag + elif cm == CellSelection.CM_PRESEL: + return flag_cm + elif cm == CellSelection.CM_FINSEL: + return flag[flag_cm] + else: + raise ValueError("param 'cm' takes only 0,1,2") + + class AgipdCorrections: def __init__( self, - max_cells, - max_pulses, - h5_data_path="INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", - h5_index_path="INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", + max_cells: int, + cell_sel: CellSelection, + h5_data_path: str = "INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", + h5_index_path: str = "INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", corr_bools: Optional[dict] = None, gain_mode: AgipdGainMode = AgipdGainMode.ADAPTIVE_GAIN, - comp_threads=1, + comp_threads: int = 1, + train_ids: Optional[np.ndarray] = None ): """ Initialize an AgipdCorrections Class :param max_cells: maximum number of memory cells to handle, e.g. if calibration constants only exist for a subset of cells - :param max_pulses: a range list of pulse indices used for - calibration and histogram. [start, end, step] + :param cell_sel: the CellSelection indicates cells selected for + calibration :param h5_data_path: path in HDF5 file which is prefixed to the image/data section :param h5_index_path: path in HDF5 file which is prefixed to the @@ -211,11 +249,13 @@ class AgipdCorrections: :param corr_bools: A dict with all of the correction booleans requested or available :param comp_threads: Number of threads to use for compressing gain/mask + :param train_ids: train IDs to process, all if omitted. The following example shows a typical use case: .. code-block:: python - agipd_corr = AgipdCorrections(max_cells, max_pulses, + cell_sel = CellRange(max_pulses, max_cells) + agipd_corr = AgipdCorrections(max_cells, cell_sel, h5_data_path=h5path, h5_index_path=h5path_idx, corr_bools=corr_bools) @@ -251,9 +291,9 @@ class AgipdCorrections: self.max_cells = max_cells self.gain_mode = gain_mode self.comp_threads = comp_threads + self.train_ids = np.array(train_ids) if train_ids is not None else None - self.start, self.last, self.step = self._validate_selected_pulses( - max_pulses, max_cells) + self.cell_sel = cell_sel # Correction parameters self.baseline_corr_noise_threshold = -1000 @@ -326,18 +366,42 @@ class AgipdCorrections: data_dict['moduleIdx'][0] = module_idx try: f = h5py.File(file_name, "r") - group = f[agipd_base]["image"] - (_, first_index, last_index, - _, valid_indices) = self.get_valid_image_idx(idx_base, f) + (valid, first_index, last_index, + train_ids, valid_indices) = self.get_valid_image_idx(idx_base, f) + + if len(valid_indices) == 0: + # If there's not a single valid index, exit early. + data_dict['nImg'][0] = 0 + return 0 + + # store valid trains in shared memory + valid_train_ids = train_ids[valid] + n_valid_trains = len(valid_train_ids) + data_dict["n_valid_trains"][0] = n_valid_trains + data_dict["valid_trains"][:n_valid_trains] = valid_train_ids + + # get cell selection for the images in this file + cm = (self.cell_sel.CM_NONE if apply_sel_pulses + else self.cell_sel.CM_PRESEL) + img_selected = self.cell_sel.get_cells_on_trains( + valid_train_ids, cm=cm) + data_dict["cm_presel"][0] = (cm == self.cell_sel.CM_PRESEL) + group = f[agipd_base]['image'] allcells = np.squeeze(group['cellId']) allpulses = np.squeeze(group['pulseId']) firange = self.gen_valid_range(first_index, last_index, self.max_cells, allcells, allpulses, valid_indices, - apply_sel_pulses) + img_selected) + + if firange is None: + # gen_valid_range() returns None if there are no cells + # to correct, exit early. + data_dict['nImg'][0] = 0 + return 0 n_img = firange.shape[0] data_dict['nImg'][0] = n_img @@ -475,6 +539,8 @@ class AgipdCorrections: fraction = self.cm_dark_fraction n_itr = self.cm_n_itr n_img = self.shared_dict[i_proc]['nImg'][0] + if n_img == 0: + return cell_id = self.shared_dict[i_proc]['cellId'][:n_img] train_id = self.shared_dict[i_proc]['trainId'][:n_img] cell_ids = cell_id[train_id == train_id[0]] @@ -760,21 +826,34 @@ class AgipdCorrections: ): """Return the indices of valid data""" if raw_format_version == 2: + idxtrains = np.squeeze(infile['/INDEX/trainId']) + + # Check against train ID filter list, if any + if self.train_ids is not None: + valid = np.in1d(idxtrains, self.train_ids) + + if not valid.any(): + # Shortcut to avoid any further loading. + return valid, 0, 0, idxtrains, np.zeros(0, dtype=np.int32) + else: + valid = np.ones_like(idxtrains, dtype=bool) + + # Load count and offsets and filter for non-emtpy trains. count = np.squeeze(infile[idx_base + "image/count"]) first = np.squeeze(infile[idx_base + "image/first"]) - if np.count_nonzero(count != 0) == 0: - raise IOError("File has no valid counts") - valid = count != 0 - idxtrains = np.squeeze(infile["/INDEX/trainId"]) + valid &= count != 0 - # Train indices are of type=f32 # Validate that train indices values fall # between medianTrain +- 1e4 - medianTrain = np.nanmedian(idxtrains) + medianTrain = np.median(idxtrains) lowok = (idxtrains > medianTrain - 1e4) highok = (idxtrains < medianTrain + 1e4) valid &= lowok & highok + if not valid.any(): + # Shortcut if no valid trains are left. + return valid, 0, 0, idxtrains, np.zeros(0, dtype=np.int32) + # Last index = last valid train + max. number of memory cells last_index = int(first[valid][-1] + count[valid][-1]) first_index = int(first[valid][0]) @@ -819,20 +898,24 @@ class AgipdCorrections: """Select sharedmem data indices to correct based on selected pulses indices. - :param i_proc: the index of sharedmem for a given file/module :return n_img: number of images to correct """ - data_dict = self.shared_dict[i_proc] n_img = data_dict['nImg'][0] - allpulses = data_dict['pulseId'][:n_img] + if not data_dict["cm_presel"][0]: + return n_img + + ntrains = data_dict["n_valid_trains"][0] + train_ids = data_dict["valid_trains"][:ntrains] # Initializing can_calibrate array - can_calibrate = self.choose_selected_pulses( - allpulses, - can_calibrate=np.ones(shape=(len(allpulses),), dtype=np.bool)) + can_calibrate = self.cell_sel.get_cells_on_trains( + train_ids, cm=self.cell_sel.CM_FINSEL + ) + if np.all(can_calibrate): + return n_img # Only select data corresponding to selected pulses # and overwrite data in shared-memory leaving @@ -856,99 +939,11 @@ class AgipdCorrections: return n_img - def _validate_selected_pulses( - self, max_pulses: List[int], - max_cells: int, - ) -> List[int]: - """Validate the selected pulses given from the notebook - - Validate the selected range of pulses to correct raw data - of at least one image. - - 1) A pulse indices within one train can't be greater - than the operating memory cells. - 2) Validate the order of the given raneg of pulses. - 3) Raise value error if generate list of pulses is empty. - - :param max_pulses: a list of at most 3 elements defining the - range of pulses to calibrate. - :param max_cells: operating memory cells. - - :return adjusted_range: An adjusted range of pulse indices to correct. - """ - - # Validate selected pulses range: - # 1) A pulseId can't be greater than the operating memory cells. - pulses_range = [max_cells if p > max_cells else p for p in max_pulses] - - if pulses_range != max_pulses: - print( - "WARNING: \"max_pulses\" list has been modified from " - f"{max_pulses} to {pulses_range}. As the number of " - "operating memory cells are less than the selected " - "maximum pulse." - ) - - if len(pulses_range) == 1: - adjusted_range = (0, pulses_range[0], 1) - elif len(pulses_range) == 2: - adjusted_range = (pulses_range[0], pulses_range[1], 1) - elif len(pulses_range) == 3: - adjusted_range = tuple(pulses_range) - else: - raise ValueError( - "ERROR: Wrong length for the list of pulses indices range. " - "Please check the given range for \"max_pulses\":" - f"{max_pulses}. \"max_pulses\" needs to be a list of " - "3 elements, [start, last, step]") - - if adjusted_range[0] > adjusted_range[1]: - raise ValueError( - "ERROR: Pulse range start is greater than range end. " - "Please check the given range for \"max_pulses\":" - f"{max_pulses}. \"max_pulses\" needs to be a list of " - "3 elements, [start, last, step]") - - if not np.all([isinstance(p, int) for p in max_pulses]): - raise TypeError( - "ERROR: \"max_pulses\" elements needs to be integers:" - f" {max_pulses}.") - - print( - "A range of pulse indices is selected to correct:" - f" {pulses_range}") - - return adjusted_range - - def choose_selected_pulses(self, allpulses: np.array, - can_calibrate: np.array) -> np.array: - """ - Choose given selected pulse from pulseId array of - raw data. The selected pulses range is validated then - used to add a booleans in can_calibrate and guide the - later appliance. - - - :param allpulses: array of pulseIds from raw data - :param can_calibrate: array of booleans for indices to calibrate - :return can_calibrate: array of booleans to calibrate depending on - selected pulses - """ - - # Check interesection between array of booleans and - # array of pulses to calibrate. - can_calibrate &= ( - (allpulses >= allpulses[self.start]) & - (allpulses <= allpulses[self.last-1]) & - (((allpulses - allpulses[self.start]) % allpulses[self.step]) == 0) # noqa - ) - return can_calibrate - def gen_valid_range(self, first_index: int, last_index: int, max_cells: int, allcells: np.array, allpulses: np.array, valid_indices: Optional[np.array] = None, - apply_sel_pulses: Optional[bool] = True + img_selected: Optional[np.array] = None, ) -> np.array: """ Validate the arrays of image.cellId and image.pulseId to check presence of data and to avoid empty trains. @@ -962,12 +957,10 @@ class AgipdCorrections: :param allcells: array of image.cellsIds of raw data :param allpulses: array of image.pulseIds of raw data :param valid_indices: validated indices of image.data - :param apply_sel_pulses: A flag for applying selected pulses - after validation for correction + :param img_selected: mask of selected cells for given + range of trains :return firange: An array of validated image.data indices to correct - # TODO: Ignore rows (32 pulse) of empty pulses even if - common-mode is selected """ if valid_indices is not None: @@ -979,12 +972,12 @@ class AgipdCorrections: can_calibrate = (allcells < max_cells) + if img_selected is not None: + can_calibrate &= img_selected + if not np.any(can_calibrate): return - if apply_sel_pulses: - can_calibrate = self.choose_selected_pulses( - allpulses, can_calibrate=can_calibrate) if valid_indices is None: firange = np.arange(first_index, last_index) else: @@ -1143,10 +1136,16 @@ class AgipdCorrections: :return: """ - self.offset[module_idx][...] = cons_data["Offset"].transpose()[...] - self.noise[module_idx][...] = cons_data["Noise"].transpose()[...] + # Distribute threads for transposition evenly across all modules + # assuming this method runs in parallel. + calgs_opts = dict(num_threads=os.cpu_count() // len(self.offset)) + + calgs.transpose_constant(self.offset[module_idx], cons_data['Offset'], **calgs_opts) + calgs.transpose_constant(self.noise[module_idx], cons_data['Noise'], **calgs_opts) if self.gain_mode is AgipdGainMode.ADAPTIVE_GAIN: - self.thresholds[module_idx][...] = cons_data["ThresholdsDark"].transpose()[:3,...] # noqa + calgs.transpose_constant(self.thresholds[module_idx], + cons_data['ThresholdsDark'][..., :3], + **calgs_opts) if self.corr_bools.get("low_medium_gap"): t0 = self.thresholds[module_idx][0] @@ -1208,7 +1207,7 @@ class AgipdCorrections: rel_gain = np.ones((128, 512, self.max_cells, 3), np.float32) if when["SlopesPC"]: - slopesPC = cons_data["SlopesPC"].astype(np.float32) + slopesPC = cons_data["SlopesPC"].astype(np.float32, copy=False) # This will handle some historical data in a different format # constant dimension injected first @@ -1239,21 +1238,21 @@ class AgipdCorrections: # are fixed to the median value. # This is applied for high and medium gain stages for i in range(self.max_cells): - pc_high_m[np.isnan(pc_high_m[..., i])] = pc_high_med[i] - pc_med_m[np.isnan(pc_med_m[..., i])] = pc_med_med[i] + pc_high_m[np.isnan(pc_high_m[..., i]), i] = pc_high_med[i] + pc_med_m[np.isnan(pc_med_m[..., i]), i] = pc_med_med[i] - pc_high_l[np.isnan(pc_high_l[..., i])] = pc_high_l_med[i] - pc_med_l[np.isnan(pc_med_l[..., i])] = pc_med_l_med[i] + pc_high_l[np.isnan(pc_high_l[..., i]), i] = pc_high_l_med[i] + pc_med_l[np.isnan(pc_med_l[..., i]), i] = pc_med_l_med[i] pc_high_m[(pc_high_m[..., i] < 0.8 * pc_high_med[i]) | - (pc_high_m[..., i] > 1.2 * pc_high_med[i])] = pc_high_med[i] # noqa + (pc_high_m[..., i] > 1.2 * pc_high_med[i]), i] = pc_high_med[i] # noqa pc_med_m[(pc_med_m[..., i] < 0.8 * pc_med_med[i]) | - (pc_med_m[..., i] > 1.2 * pc_med_med[i])] = pc_med_med[i] # noqa + (pc_med_m[..., i] > 1.2 * pc_med_med[i]), i] = pc_med_med[i] # noqa pc_high_l[(pc_high_l[..., i] < 0.8 * pc_high_l_med[i]) | - (pc_high_l[..., i] > 1.2 * pc_high_l_med[i])] = pc_high_l_med[i] # noqa + (pc_high_l[..., i] > 1.2 * pc_high_l_med[i]), i] = pc_high_l_med[i] # noqa pc_med_l[(pc_med_l[..., i] < 0.8 * pc_med_l_med[i]) | - (pc_med_l[..., i] > 1.2 * pc_med_l_med[i])] = pc_med_l_med[i] # noqa + (pc_med_l[..., i] > 1.2 * pc_med_l_med[i]), i] = pc_med_l_med[i] # noqa # ration between HG and MG per pixel per mem cell used # for rel gain calculation @@ -1283,10 +1282,10 @@ class AgipdCorrections: frac_high_med = np.ones((self.max_cells,), np.float32) self.md_additional_offset[module_idx][...] = md_additional_offset.transpose()[...] # noqa - self.rel_gain[module_idx][...] = rel_gain[...].transpose() + calgs.transpose_constant(self.rel_gain[module_idx], rel_gain, **calgs_opts) self.frac_high_med[module_idx][...] = frac_high_med - self.mask[module_idx][...] = bpixels.transpose()[...] + calgs.transpose_constant(self.mask[module_idx], bpixels, **calgs_opts) return @@ -1432,7 +1431,6 @@ class AgipdCorrections: :param shape: Shape of expected data (nImg, x, y) :param n_cores_files: Number of files, handled in parallel """ - self.shared_dict = [] for i in range(n_cores_files): self.shared_dict.append({}) @@ -1451,3 +1449,201 @@ class AgipdCorrections: self.shared_dict[i]["raw_data"] = sharedmem.empty(shape, dtype="f4") # noqa self.shared_dict[i]["rel_corr"] = sharedmem.empty(shape, dtype="f4") # noqa self.shared_dict[i]["t0_rgain"] = sharedmem.empty(shape, dtype="u2") # noqa + # Valid trains + self.shared_dict[i]["cm_presel"] = sharedmem.empty(1, dtype="b") + self.shared_dict[i]["n_valid_trains"] = sharedmem.empty(1, dtype="i4") # noqa + self.shared_dict[i]["valid_trains"] = sharedmem.empty(1024, dtype="u8") # noqa + + +def validate_selected_pulses( + max_pulses: List[int], max_cells: int +) -> List[int]: + """Validate the selected pulses given from the notebook + + Validate the selected range of pulses to correct raw data + of at least one image. + + 1) A pulse indices within one train can't be greater + than the operating memory cells. + 2) Validate the order of the given raneg of pulses. + 3) Raise value error if generate list of pulses is empty. + + :param max_pulses: a list of at most 3 elements defining the + range of pulses to calibrate. + :param max_cells: operating memory cells. + + :return adjusted_range: An adjusted range of pulse indices to correct. + """ + + # Validate selected pulses range: + # 1) A pulseId can't be greater than the operating memory cells. + pulses_range = [max_cells if p > max_cells else p for p in max_pulses] + + if pulses_range != max_pulses: + print( + "WARNING: \"max_pulses\" list has been modified from " + f"{max_pulses} to {pulses_range}. As the number of " + "operating memory cells are less than the selected " + "maximum pulse." + ) + + if len(pulses_range) == 1: + adjusted_range = (0, pulses_range[0], 1) + elif len(pulses_range) == 2: + adjusted_range = (pulses_range[0], pulses_range[1], 1) + elif len(pulses_range) == 3: + adjusted_range = tuple(pulses_range) + else: + raise ValueError( + "ERROR: Wrong length for the list of pulses indices range. " + "Please check the given range for \"max_pulses\":" + f"{max_pulses}. \"max_pulses\" needs to be a list of " + "3 elements, [start, last, step]") + + if adjusted_range[0] > adjusted_range[1]: + raise ValueError( + "ERROR: Pulse range start is greater than range end. " + "Please check the given range for \"max_pulses\":" + f"{max_pulses}. \"max_pulses\" needs to be a list of " + "3 elements, [start, last, step]") + + if not np.all([isinstance(p, int) for p in max_pulses]): + raise TypeError( + "ERROR: \"max_pulses\" elements needs to be integers:" + f" {max_pulses}.") + + print( + "A range of pulse indices is selected to correct:" + f" {pulses_range}") + + return adjusted_range + + +class CellRange(CellSelection): + """Selection of detector memory cells given as a range""" + + def __init__(self, crange: List[int], max_cells: int): + """Initialize range selection + + :param crange: range parameters of selected cells, + list up to 3 elements + :param max_cells: number of exposed cells + """ + self.max_cells = max_cells + self.crange = validate_selected_pulses(crange, max_cells) + self.flag = np.zeros(self.max_cells, bool) + self.flag_cm = np.zeros(self.ncell_max, bool) + self.flag[slice(*crange)] = True + self.flag_cm[:self.max_cells] = self.flag + self.flag_cm = (self.flag_cm.reshape(-1, self.row_size).any(1) + .repeat(self.row_size)[:self.max_cells]) + + def msg(self): + return ( + f"Use range selection with crange={self.crange}, " + f"max_cells={self.max_cells}\n" + f"Frames per train: {self.flag.sum()}" + ) + + def get_cells_on_trains( + self, train_sel: List[int], cm: int = 0 + ) -> np.array: + return np.tile(self._sel_for_cm(self.flag, self.flag_cm, cm), + len(train_sel)) + + +class LitFrameSelection(CellSelection): + """Selection of detector memery cells indicated as lit frames + by the AgipdLitFrameFinder + """ + def __init__(self, dev: str, dc: DataCollection, train_ids: List[int], + crange: Optional[List[int]] = None, + energy_threshold: Optional[float] = None): + """Initialize lit frame selection + + :param dev: AgipdLitFrameFinder device name + :param dc: EXtra-data DataCollection of a run + :param train_ids: the list of selected trains + :param crange: range parameters of selected cells, + list up to 3 elements + """ + # read AgipdLitFrameFinder data + self.dev = dev + self.crange = validate_selected_pulses(crange, self.ncell_max) + self.ethr = energy_threshold + intr_src = dev + ':output' + nfrm = dc[intr_src, 'data.nFrame'].ndarray() + litfrm_train_ids = dc[intr_src, 'data.trainId'].ndarray() + litfrm = dc[intr_src, 'data.nPulsePerFrame'].ndarray() > 0 + if (energy_threshold != -1000 and + 'data.energyPerFrame' in dc.keys_for_source(intr_src)): + + litfrm &= (dc[intr_src, 'data.energyPerFrame'].ndarray() + > energy_threshold) + + # apply range selection + if crange is None: + cellsel = np.ones(self.ncell_max, bool) + else: + cellsel = np.zeros(self.ncell_max, bool) + cellsel[slice(*crange)] = True + + # update train selection, removing trains without litframe data + if train_ids is None: + train_ids = np.unique(litfrm_train_ids) + ntrain = len(train_ids) + else: + ntrain_orig = len(train_ids) + train_ids, _, litfrm_train_idx = np.intersect1d( + train_ids, litfrm_train_ids, return_indices=True + ) + litfrm = litfrm[litfrm_train_idx] + nfrm = nfrm[litfrm_train_idx] + ntrain = len(train_ids) + if ntrain != ntrain_orig: + print(f"Lit frame data missed for {ntrain_orig - ntrain}. " + "Skip them.") + + # initiate instance attributes + nimg = np.sum(nfrm) + self.train_ids = train_ids + self.count = nfrm + self.first = np.zeros(ntrain, int) + self.flag = np.zeros(nimg, bool) + self.flag_cm = np.zeros(nimg, bool) + self.min_sel = nfrm.max() + self.max_sel = 0 + # compress frame pattern + i0 = 0 + for i in range(ntrain): + n = nfrm[i] + iN = i0 + n + trnflg = litfrm[i] & cellsel + trnflg[n:] = False + self.first[i] = i0 + self.flag[i0:iN] = trnflg[:n] + self.flag_cm[i0:iN] = (trnflg.reshape(-1, self.row_size).any(1) + .repeat(self.row_size)[:n]) + nlit = trnflg[:n].sum() + self.min_sel = min(self.min_sel, nlit) + self.max_sel = max(self.max_sel, nlit) + i0 = iN + + def msg(self): + srng = (f"{self.min_sel}" if self.min_sel == self.max_sel + else f"{self.min_sel}~{self.max_sel}") + return ( + f"Use lit frame selection from {self.dev}, crange={self.crange}\n" + f"Frames per train: {srng}" + ) + + def get_cells_on_trains( + self, train_sel: List[int], cm: int = 0 + ) -> np.array: + train_idx = np.flatnonzero(np.in1d(self.train_ids, train_sel)) + i0 = self.first[train_idx] + iN = i0 + self.count[train_idx] + idx = np.concatenate( + [np.arange(i0[i], iN[i], dtype=int) for i in range(train_idx.size)] + ) + return self._sel_for_cm(self.flag[idx], self.flag_cm[idx], cm) diff --git a/src/cal_tools/plotting.py b/src/cal_tools/plotting.py index a7907f9264e45c918f6b34847e66c17f7edb7a3c..fcafaffad2e771f4abb3438dbed074b52dfbc93b 100644 --- a/src/cal_tools/plotting.py +++ b/src/cal_tools/plotting.py @@ -14,8 +14,6 @@ from matplotlib import colors from matplotlib.patches import Patch from mpl_toolkits.axes_grid1 import AxesGrid -plt.rcParams["mpl_toolkits.legacy_colorbar"] = False - def show_overview( d, cell_to_preview, gain_to_preview, out_folder=None, infix=None diff --git a/src/xfel_calibrate/calibrate.py b/src/xfel_calibrate/calibrate.py index e83c60a41de3796751e0b7fd7873cfd650f1468e..6f4138507830a831da0ca78adbcb10423784a65e 100755 --- a/src/xfel_calibrate/calibrate.py +++ b/src/xfel_calibrate/calibrate.py @@ -1,44 +1,42 @@ #!/usr/bin/env python -import argparse -import inspect import json import locale import math import os -import pprint import re +import shlex import shutil import stat -import string import sys import textwrap import warnings from datetime import datetime from pathlib import Path -from subprocess import DEVNULL, check_output, check_call, call +from subprocess import DEVNULL, call, check_call, check_output from typing import List, Union import nbformat import numpy as np +import yaml from jinja2 import Template from nbparameterise import extract_parameters, parameter_values, replace_definitions -import yaml import cal_tools.tools from .finalize import tex_escape -from .notebooks import notebooks +from .nb_args import ( + consolize_name, + first_markdown_cell, + get_notebook_function, + parse_argv_and_load_nb, + set_figure_format, +) from .settings import ( default_report_path, finalize_time_limit, - free_nodes_cmd, launcher_command, - max_reserved, - preempt_nodes_cmd, python_path, - reservation, - reservation_char, sprof, temp_path, try_report_to_output, @@ -47,131 +45,6 @@ from .settings import ( PKG_DIR = os.path.dirname(os.path.abspath(__file__)) -# Add a class combining raw description formatting with -# Metavariable default outputs -class RawTypeFormatter(argparse.RawDescriptionHelpFormatter, - argparse.MetavarTypeHelpFormatter, - argparse.ArgumentDefaultsHelpFormatter): - pass - - -# The argument parser for calibrate.py, will be extended depending -# on the options given. - -def make_initial_parser(**kwargs): - parser = argparse.ArgumentParser( - description="Main entry point for offline calibration", - formatter_class=RawTypeFormatter, - **kwargs - ) - - parser.add_argument('detector', metavar='DETECTOR', type=str, - help='The detector to calibrate: ' + ", ".join(notebooks)) - - parser.add_argument('type', metavar='TYPE', type=str, - help='Type of calibration.') - - parser.add_argument('--no-cluster-job', - action="store_true", - default=False, - help="Do not run as a cluster job") - - parser.add_argument('--prepare-only', action="store_true", - help="Prepare notebooks but don't run them") - - parser.add_argument('--report-to', type=str, - help='Filename (and optionally path) for output' - ' report') - - parser.add_argument('--concurrency-par', type=str, - help='Name of concurrency parameter.' - 'If not given, it is taken from configuration.') - - parser.add_argument('--constants-from', type=str, help=( - "Path to a calibration-metadata.yml file. If given, " - "retrieved-constants will be copied to use for a new correction." - )) - - parser.add_argument('--priority', type=int, default=2, - help="Priority of batch jobs. If priority<=1, reserved" - " nodes become available.") - - parser.add_argument('--vector-figs', action="store_true", default=False, - help="Use vector graphics for figures in the report.") - - parser.add_argument('--slurm-mem', type=int, default=500, - help="Requested node RAM in GB") - - parser.add_argument('--slurm-name', type=str, default='xfel_calibrate', - help='Name of slurm job') - - parser.add_argument('--slurm-scheduling', type=int, default=0, - help='Change scheduling priority for a slurm job ' - '+- 2147483645 (negative value increases ' - 'priority)') - - parser.add_argument('--request-time', type=str, default='Now', - help='Time of request to process notebook. Iso format') - - parser.add_argument_group('required arguments') - - parser.add_argument('--slurm-partition', type=str, default="", - help="Submit jobs in this Slurm partition") - - parser.add_argument('--reservation', type=str, default="", - help="Submit jobs in this Slurm reservation, " - "overriding --slurm-partition if both are set") - - return parser - - -# Helper functions for parser extensions - -def make_intelli_list(ltype): - """ Parses a list from range and comma expressions. - - An expression of the form "1-5,6" will be parsed into the following - list: [1,2,3,4,6] - - """ - class IntelliListAction(argparse.Action): - element_type = ltype - - def __init__(self, *args, **kwargs): - super(IntelliListAction, self).__init__(*args, **kwargs) - - def __call__(self, parser, namespace, values, option_string=None): - parsed_values = [] - values = ",".join(values) - if isinstance(values, str): - for rcomp in values.split(","): - if "-" in rcomp: - start, end = rcomp.split("-") - parsed_values += list(range(int(start), int(end))) - else: - parsed_values += [int(rcomp)] - elif isinstance(values, (list, tuple)): - parsed_values = values - else: - parsed_values = [values, ] - - parsed_values = [self.element_type(p) for p in parsed_values] - print("Parsed input {} to {}".format(values, parsed_values)) - setattr(namespace, self.dest, parsed_values) - - return IntelliListAction - - -def consolize_name(name): - """ Names of console parameters don't have underscores """ - return name.replace("_", "-") - - -def deconsolize_args(args): - """ Variable names have underscores """ - return {k.replace("-", "_"): v for k, v in args.items()} - - def extract_title_author(nb): """ Tries to extract title, author from markdown. @@ -179,6 +52,8 @@ def extract_title_author(nb): """ first_md = first_markdown_cell(nb) + if first_md is None: + return None, None source = first_md["source"] title = re.findall(r'#+\s*(.*)\s*#+', source) author = re.findall( @@ -214,86 +89,6 @@ def get_python_version(python_exe): return check_output([python_exe, '--version']).decode('utf-8').split()[1] -def get_cell_n(nb, cell_type, cell_n): - """ - Return notebook cell with given number and given type - - :param nb: jupyter notebook - :param cell_type: cell type, 'code' or 'markdown' - :param cell_n: cell number (count from 0) - :return: notebook cell - """ - counter = 0 - for cell in nb.cells: - if cell.cell_type == cell_type: - if counter == cell_n: - return cell - counter += 1 - - -def first_code_cell(nb): - """ Return the first code cell of a notebook """ - return get_cell_n(nb, 'code', 0) - - -def first_markdown_cell(nb): - """ Return the first markdown cell of a notebook """ - return get_cell_n(nb, 'markdown', 0) - - -def make_epilog(nb, caltype=None): - """ Make an epilog from the notebook to add to parser help - """ - msg = "" - header_cell = first_markdown_cell(nb) - lines = header_cell.source.split("\n") - if caltype: - msg += "{:<15} {}".format(caltype, lines[0]) + "\n" - else: - msg += "{}".format(lines[0]) + "\n" - pp = pprint.PrettyPrinter(indent=(17 if caltype else 0)) - if len(lines[1:]): - plines = pp.pformat(lines[1:])[1:-1].split("\n") - for line in plines: - sline = line.replace("'", "", 1) - sline = sline.replace("', '", " " * (17 if caltype else 0), 1) - sline = sline[::-1].replace("'", "", 1)[::-1] - sline = sline.replace(" ,", " ") - if len(sline) > 1 and sline[0] == ",": - sline = sline[1:] - msg += sline + "\n" - msg += "\n" - return msg - - -def get_notebook_function(nb, fname): - flines = [] - def_found = False - indent = None - for cell in nb.cells: - if cell.cell_type == 'code': - lines = cell.source.split("\n") - for line in lines: - - if def_found: - lin = len(line) - len(line.lstrip()) - if indent is None: - if lin != 0: - indent = lin - flines.append(line) - elif lin >= indent: - flines.append(line) - else: - return "\n".join(flines) - - if re.search(r"def\s+{}\(.*\):\s*".format(fname), line) and not def_found: - # print("Found {} in line {}".format(fname, line)) - # set this to indent level - def_found = True - flines.append(line) - return None - - def balance_sequences(in_folder: str, run: int, sequences: List[int], sequences_per_node: int, karabo_da: Union[list, str], max_nodes: int = 8): @@ -355,189 +150,6 @@ def balance_sequences(in_folder: str, run: int, sequences: List[int], if l.size > 0] -def make_extended_parser() -> argparse.ArgumentParser: - """Create an ArgumentParser using information from the notebooks""" - - # extend the parser according to user input - # the first case is if a detector was given, but no calibration type - if len(sys.argv) == 3 and "-h" in sys.argv[2]: - detector = sys.argv[1].upper() - try: - det_notebooks = notebooks[detector] - except KeyError: - # TODO: This should really go to stderr not stdout - print("Not one of the known detectors: {}".format(notebooks.keys())) - sys.exit(1) - - msg = "Options for detector {}\n".format(detector) - msg += "*" * len(msg) + "\n\n" - - # basically, this creates help in the form of - # - # TYPE some description that is - # indented for this type. - # - # The information is extracted from the first markdown cell of - # the notebook. - for caltype, notebook in det_notebooks.items(): - if notebook.get("notebook") is None: - if notebook.get("user", {}).get("notebook") is None: - raise KeyError( - f"`{detector}` does not have a notebook path, for " - "notebooks that are stored in pycalibration set the " - "`notebook` key to a relative path or set the " - "`['user']['notebook']` key to an absolute path/path " - "pattern. Notebook configuration dictionary contains " - f"only: `{notebook}`" - "" - ) - # Everything should be indented by 17 spaces - msg += caltype.ljust(17) + "User defined notebook, arguments may vary\n" - msg += " "*17 + "User notebook expected to be at path:\n" - msg += " "*17 + notebook["user"]["notebook"] + "\n" - else: - nbpath = os.path.join(PKG_DIR, notebook["notebook"]) - nb = nbformat.read(nbpath, as_version=4) - msg += make_epilog(nb, caltype=caltype) - - return make_initial_parser(epilog=msg) - elif len(sys.argv) <= 3: - return make_initial_parser() - - # A detector and type was given. We derive the arguments - # from the corresponding notebook - args, _ = make_initial_parser(add_help=False).parse_known_args() - try: - nb_info = notebooks[args.detector.upper()][args.type.upper()] - except KeyError: - print("Not one of the known calibrations or detectors") - sys.exit(1) - - if nb_info["notebook"]: - notebook = os.path.join(PKG_DIR, nb_info["notebook"]) - else: - # If `"notebook"` entry is None, then set it to the user provided - # notebook TODO: This is a very hacky workaround, better implementation - # is not really possible with the current state of this module - user_notebook_path = nb_info["user"]["notebook"] - # Pull out the variables in the templated path string, and get values - # from command line args (e.g. --proposal 1234 -> {proposal}) - user_notebook_variables = [ - name for (_, name, _, _) in string.Formatter().parse(user_notebook_path) - if name is not None - ] - - user_notebook_parser = argparse.ArgumentParser(add_help=False) - for var in user_notebook_variables: - user_notebook_parser.add_argument(f"--{var}") - - user_notebook_args, _ = user_notebook_parser.parse_known_args() - - nb_info["notebook"] = user_notebook_path.format(**vars(user_notebook_args)) - notebook = nb_info["notebook"] - - cvar = nb_info.get("concurrency", {}).get("parameter", None) - - nb = nbformat.read(notebook, as_version=4) - - # extend parameters if needed - ext_func = nb_info.get("extend parms", None) - if ext_func is not None: - extend_params(nb, ext_func) - - # No extend parms function - add statically defined parameters from the - # first code cell - parser = make_initial_parser() - add_args_from_nb(nb, parser, cvar=cvar) - return parser - -def add_args_from_nb(nb, parser, cvar=None, no_required=False): - """Add argparse arguments for parameters in the first cell of a notebook. - - Uses nbparameterise to extract the parameter information. Each foo_bar - parameter gets a --foo-bar command line option. - Boolean parameters get a pair of flags like --abc and --no-abc. - - :param nb: NotebookNode object representing a loaded .ipynb file - :param parser: argparse.ArgumentParser instance - :param str cvar: Name of the concurrency parameter. - :param bool no_required: If True, none of the added options are required. - """ - parser.description = make_epilog(nb) - parms = extract_parameters(nb, lang='python') - - for p in parms: - helpstr = ("Default: %(default)s" if not p.comment - else "{}. Default: %(default)s".format(p.comment.replace("#", " ").strip())) - required = (p.comment is not None - and "required" in p.comment - and not no_required - and p.name != cvar) - - # This may be not a public API - # May require reprogramming in case of argparse updates - pars_group = parser._action_groups[2 if required else 1] - - default = p.value if (not required) else None - - if issubclass(p.type, list) or p.name == cvar: - ltype = type(p.value[0]) if issubclass(p.type, list) else p.type - range_allowed = "RANGE ALLOWED" in p.comment.upper() if p.comment else False - pars_group.add_argument(f"--{consolize_name(p.name)}", - nargs='+', - type=ltype if not range_allowed else str, - default=default, - help=helpstr, - required=required, - action=make_intelli_list(ltype) if range_allowed else None) - elif issubclass(p.type, bool): - # For a boolean, make --XYZ and --no-XYZ options. - alt_group = pars_group.add_mutually_exclusive_group(required=required) - alt_group.add_argument(f"--{consolize_name(p.name)}", - action="store_true", - default=default, - help=helpstr, - dest=p.name) - alt_group.add_argument(f"--no-{consolize_name(p.name)}", - action="store_false", - default=default, - help=f"Opposite of --{consolize_name(p.name)}", - dest=p.name) - else: - pars_group.add_argument(f"--{consolize_name(p.name)}", - type=p.type, - default=default, - help=helpstr, - required=required) - -def extend_params(nb, extend_func_name): - """Add parameters in the first code cell by calling a function in the notebook - """ - func = get_notebook_function(nb, extend_func_name) - - if func is None: - warnings.warn( - f"Didn't find concurrency function {extend_func_name} in notebook", - RuntimeWarning - ) - return - - # Make a temporary parser that won't exit if it sees -h or --help - pre_parser = make_initial_parser(add_help=False) - add_args_from_nb(nb, pre_parser, no_required=True) - known, _ = pre_parser.parse_known_args() - args = deconsolize_args(vars(known)) - - df = {} - exec(func, df) - f = df[extend_func_name] - sig = inspect.signature(f) - - extension = f(*[args[p] for p in sig.parameters]) - fcc = first_code_cell(nb) - fcc["source"] += "\n" + extension - - def get_par_attr(parms, key, attr, default=None): """ Return the type of parameter with name key @@ -569,19 +181,6 @@ def flatten_list(l): return '' -def set_figure_format(nb, enable_vector_format): - """Set svg format in inline backend for figures - - If parameter enable_vector_format is set to True, svg format will - be used for figures in the notebook rendering. Subsequently vector - graphics figures will be used for report. - """ - - if enable_vector_format: - cell = get_cell_n(nb, 'code', 1) - cell.source += "\n%config InlineBackend.figure_formats = ['svg']\n" - - def create_finalize_script(fmt_args, temp_path, job_list) -> str: """ Create a finalize script to produce output report @@ -600,6 +199,8 @@ def create_finalize_script(fmt_args, temp_path, job_list) -> str: run_path='{{run_path}}', out_path='{{out_path}}', version='{{version}}', + title='{{title}}', + author='{{author}}', report_to='{{report_to}}', data_path='{{in_folder}}', request_time='{{request_time}}', @@ -619,7 +220,8 @@ def create_finalize_script(fmt_args, temp_path, job_list) -> str: return f_name -def run_finalize(fmt_args, temp_path, job_list, sequential=False): +def run_finalize( + fmt_args, temp_path, job_list, sequential=False, partition="exfel"): finalize_script = create_finalize_script(fmt_args, temp_path, job_list) cmd = [] @@ -632,7 +234,7 @@ def run_finalize(fmt_args, temp_path, job_list, sequential=False): '--open-mode=append', # So we can see if it's preempted & requeued '--job-name', 'xfel-cal-finalize', '--time', finalize_time_limit, - '--partition', 'exfel', + '--partition', partition, "--dependency=afterany:" + ":".join(str(j) for j in job_list), ] print(" ".join(cmd)) @@ -642,6 +244,7 @@ def run_finalize(fmt_args, temp_path, job_list, sequential=False): sys.executable, # Python with calibration machinery installed temp_path, finalize_script, + fmt_args['report_to'] ] output = check_output(cmd, input=b'').decode('utf8') @@ -652,7 +255,7 @@ def run_finalize(fmt_args, temp_path, job_list, sequential=False): return jobid -def save_executed_command(run_tmp_path, version): +def save_executed_command(run_tmp_path, version, argv): """ Create a file with string used to execute `xfel_calibrate` @@ -663,42 +266,28 @@ def save_executed_command(run_tmp_path, version): f_name = os.path.join(run_tmp_path, "run_calibrate.sh") with open(f_name, "w") as finfile: finfile.write(f'# pycalibration version: {version}\n') - finfile.write(' '.join(sys.argv)) + finfile.write(shlex.join(argv)) class SlurmOptions: def __init__( self, job_name=None, nice=None, mem=None, partition=None, reservation=None, - priority_group=2, ): self.job_name = job_name or 'xfel_calibrate' self.nice = nice self.mem = mem self.partition = partition self.reservation = reservation - self.priority_group = priority_group def get_partition_or_reservation(self) -> List[str]: """Return sbatch arguments to use a partition or reservation --reservation and --slurm-partition options have precedence. - Otherwise, if --priority is <=1, it will use a configured reservation - depending on how many nodes are currently free. + Otherwise, a default partition is used. """ if self.reservation: return ['--reservation', self.reservation] - elif self.partition: - return ['--partition', self.partition] - elif self.priority_group <= 1: - auto_resvn = reservation_char if self.priority_group <= 0 else reservation - # Use a reservation if there aren't many general nodes available to us - free = int(check_output(free_nodes_cmd, shell=True).decode('utf8')) - preempt = int(check_output(preempt_nodes_cmd, shell=True).decode('utf8')) - if free + preempt < max_reserved: - return ['--reservation', auto_resvn] - - # Fallback to using the configured partition (default: exfel) - return ['--partition', sprof] + return ['--partition', self.partition or sprof] def get_launcher_command(self, temp_path, after_ok=(), after_any=()) -> List[str]: """ @@ -717,7 +306,7 @@ class SlurmOptions: launcher_slurm += ["--job-name", self.job_name] if self.nice: - launcher_slurm += ["--nice", self.nice] + launcher_slurm.append(f"--nice={self.nice}") if self.mem: launcher_slurm.append(f"--mem={self.mem}G") @@ -770,7 +359,9 @@ def prepare_job( params = parameter_values(params, cluster_profile=cluster_profile) new_nb = replace_definitions(nb, params, execute=False, lang='python') if not show_title: - first_markdown_cell(new_nb).source = '' + title_cell = first_markdown_cell(new_nb) + if title_cell is not None: + title_cell.source = '' set_figure_format(new_nb, args["vector_figs"]) new_name = f"{nb_path.stem}__{cparm}__{suffix}.ipynb" @@ -964,28 +555,17 @@ def make_par_table(parms, run_tmp_path: str): finfile.write(textwrap.dedent(tmpl.render(p=col_type, lines=l_parms))) -def run(): +def run(argv=None): """ Run a calibration task with parser arguments """ # Ensure files are opened as UTF-8 by default, regardless of environment. locale.setlocale(locale.LC_CTYPE, ('en_US', 'UTF-8')) - parser = make_extended_parser() - args = deconsolize_args(vars(parser.parse_args())) - detector = args["detector"].upper() - caltype = args["type"].upper() - sequential = args["no_cluster_job"] + if argv is None: + argv = sys.argv - try: - nb_info = notebooks[detector][caltype] - except KeyError: - print("Not one of the known calibrations or detectors") - return 1 - - pre_notebooks = nb_info.get("pre_notebooks", []) - notebook = nb_info["notebook"] - dep_notebooks = nb_info.get("dep_notebooks", []) - concurrency = nb_info.get("concurrency", {'parameter': None}) + args, nb_details = parse_argv_and_load_nb(argv) + concurrency = nb_details.concurrency concurrency_par = args["concurrency_par"] or concurrency['parameter'] if concurrency_par == concurrency['parameter']: # Use the defaults from notebook.py to split the work into several jobs @@ -996,22 +576,14 @@ def run(): # don't use the associated settings from there. concurrency_defval = concurrency_func = None - - notebook_path = Path(PKG_DIR, notebook) - nb = nbformat.read(notebook_path, as_version=4) - - # extend parameters if needed - ext_func = nb_info.get("extend parms", None) - if ext_func is not None: - extend_params(nb, ext_func) - - parms = extract_parameters(nb, lang='python') + notebook_path = nb_details.path + nb = nb_details.contents title, author = extract_title_author(nb) version = get_pycalib_version() if not title: - title = "{} {} Calibration".format(detector, caltype) + title = f"{nb_details.detector} {nb_details.caltype} Calibration" if not author: author = "anonymous" if not version: @@ -1022,23 +594,29 @@ def run(): run_uuid = f"t{datetime.now().strftime('%y%m%d_%H%M%S')}" # check if concurrency parameter is given and we run concurrently - if not any(p.name == "parameter" for p in parms) and concurrency_par is not None: + if concurrency_par is not None and not any( + p.name == concurrency_par for p in nb_details.default_params + ): msg = f"Notebook cannot be run concurrently: no {concurrency_par} parameter" warnings.warn(msg, RuntimeWarning) # If not explicitly specified, use a new profile for ipcluster - if args.get("cluster_profile") in {None, parser.get_default("cluster_profile")}: - args['cluster_profile'] = "slurm_prof_{}".format(run_uuid) + default_params_by_name = {p.name: p.value for p in nb_details.default_params} + if 'cluster_profile' in default_params_by_name: + if args.get("cluster_profile") == default_params_by_name["cluster_profile"]: + args['cluster_profile'] = "slurm_prof_{}".format(run_uuid) # create a temporary output directory to work in - run_tmp_path = os.path.join(temp_path, f"slurm_out_{detector}_{caltype}_{run_uuid}") + run_tmp_path = os.path.join( + temp_path, f"slurm_out_{nb_details.detector}_{nb_details.caltype}_{run_uuid}" + ) os.makedirs(run_tmp_path) # Write all input parameters to rst file to be included to final report - parms = parameter_values(parms, **args) + parms = parameter_values(nb_details.default_params, **args) make_par_table(parms, run_tmp_path) # And save the invocation of this script itself - save_executed_command(run_tmp_path, version) + save_executed_command(run_tmp_path, version, argv) # Copy the bash script which will be used to run notebooks shutil.copy2( @@ -1047,7 +625,7 @@ def run(): ) # wait on all jobs to run and then finalize the run by creating a report from the notebooks - out_path = Path(default_report_path) / detector.upper() / caltype.upper() / datetime.now().isoformat() + out_path = Path(default_report_path) / nb_details.detector / nb_details.caltype / datetime.now().isoformat() if try_report_to_output: if "out_folder" in args: out_path = Path(args["out_folder"]).absolute() @@ -1057,7 +635,9 @@ def run(): out_path.mkdir(parents=True, exist_ok=True) # Use given report name, falling back to notebook title - if args["report_to"] is None: + if args['skip_report']: + report_to = '' + elif args["report_to"] is None: report_to = out_path / title.replace(" ", "") print(f"report_to not specified, will use {report_to}") else: @@ -1069,11 +649,9 @@ def run(): print(f"report_to path contained no path, saving report in '{out_path}'") report_to = out_path / report_to - user_venv = nb_info.get("user", {}).get("venv") - if user_venv: - user_venv = Path(user_venv.format(**args)) - print("Using specified venv:", user_venv) - python_exe = str(user_venv / 'bin' / 'python') + if nb_details.user_venv: + print("Using specified venv:", nb_details.user_venv) + python_exe = str(nb_details.user_venv / 'bin' / 'python') else: python_exe = python_path @@ -1086,7 +664,9 @@ def run(): parm_subdict[name] = p.value metadata["pycalibration-version"] = version - metadata["report-path"] = f"{report_to}.pdf" + metadata["report-path"] = f"{report_to}.pdf" if report_to \ + else '# REPORT SKIPPED #' + metadata['reproducible'] = not args['not_reproducible'] metadata["concurrency"] = { 'parameter': concurrency_par, 'default': concurrency_defval, @@ -1107,8 +687,9 @@ def run(): metadata.save() # Record installed Python packages for reproducing the environment - with open(os.path.join(run_tmp_path, 'requirements.txt'), 'wb') as f: - check_call([python_exe, '-m', 'pip', 'freeze'], stdout=f) + if not args['skip_env_freeze']: + with open(os.path.join(run_tmp_path, 'requirements.txt'), 'wb') as f: + check_call([python_exe, '-m', 'pip', 'freeze'], stdout=f) folder = get_par_attr(parms, 'in_folder', 'value', '') @@ -1120,8 +701,7 @@ def run(): pre_jobs = [] cluster_cores = concurrency.get("cluster cores", 8) # Check if there are pre-notebooks - for pre_notebook in pre_notebooks: - pre_notebook_path = Path(PKG_DIR, pre_notebook) + for pre_notebook_path in nb_details.pre_paths: lead_nb = nbformat.read(pre_notebook_path, as_version=4) pre_jobs.append(prepare_job( run_tmp_path, lead_nb, pre_notebook_path, args, @@ -1148,7 +728,7 @@ def run(): defcval = get_par_attr(parms, concurrency_par, 'value') if defcval is not None: print(f"Concurrency parameter '{concurrency_par}' " - f"is taken from '{notebook}'") + f"is taken from '{notebook_path}'") cvals = defcval if isinstance(defcval, (list, tuple)) else [defcval] if concurrency_func: @@ -1171,6 +751,11 @@ def run(): cvals = f(*callargs) print(f"Split concurrency into {cvals}") + if cvals is None: + raise ValueError( + f"No values found for {concurrency_par} (concurrency parameter)" + ) + # get expected type cvtype = get_par_attr(parms, concurrency_par, 'type', list) cvals = remove_duplications(cvals) @@ -1188,8 +773,7 @@ def run(): # Prepare dependent notebooks (e.g. summaries after correction) dep_jobs = [] - for i, dep_notebook in enumerate(dep_notebooks): - dep_notebook_path = Path(PKG_DIR, dep_notebook) + for i, dep_notebook_path in enumerate(nb_details.dep_paths): dep_nb = nbformat.read(dep_notebook_path, as_version=4) dep_jobs.append(prepare_job( run_tmp_path, dep_nb, dep_notebook_path, args, @@ -1213,7 +797,7 @@ def run(): print("Files prepared, not executing now (--prepare-only option).") print("To execute the notebooks, run:") rpt_opts = '' - if user_venv is not None: + if nb_details.user_venv is not None: rpt_opts = f'--python {python_exe}' print(f" python -m xfel_calibrate.repeat {run_tmp_path} {rpt_opts}") return @@ -1221,7 +805,7 @@ def run(): submission_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') # Launch the calibration work - if sequential: + if args["no_cluster_job"]: print("Running notebooks directly, not via Slurm...") errors = job_chain.run_direct() joblist = [] @@ -1234,24 +818,26 @@ def run(): mem=args['slurm_mem'], reservation=args['reservation'], partition=args['slurm_partition'], - priority_group=args['priority'], )) errors = False fmt_args = {'run_path': run_tmp_path, 'out_path': out_path, 'version': version, + 'title': title, + 'author': author, 'report_to': report_to, 'in_folder': folder, 'request_time': request_time, - 'submission_time': submission_time + 'submission_time': submission_time, } joblist.append(run_finalize( fmt_args=fmt_args, temp_path=run_tmp_path, job_list=joblist, - sequential=sequential, + sequential=args["no_cluster_job"], + partition=args["slurm_partition"] or "exfel", )) if any(j is not None for j in joblist): diff --git a/src/xfel_calibrate/finalize.py b/src/xfel_calibrate/finalize.py index 3683063011ea05753aad60c7cd893c969c502831..f50e717c60ca715f59156fdaea99333517624f76 100644 --- a/src/xfel_calibrate/finalize.py +++ b/src/xfel_calibrate/finalize.py @@ -393,19 +393,14 @@ def tex_escape(text): return regex.sub(lambda match: conv[match.group()], text) -def finalize(joblist, finaljob, run_path, out_path, version, report_to, data_path='Unknown', +def finalize(joblist, finaljob, run_path, out_path, version, title, author, report_to, data_path='Unknown', request_time='', submission_time=''): run_path = Path(run_path) - prepare_plots(run_path) - # Archiving files in slurm_tmp if finaljob: joblist.append(str(finaljob)) metadata = cal_tools.tools.CalibrationMetadata(out_path) - nb_info = metadata.get('notebook', {}) - title = nb_info.get('title', 'Unknown calibration') - author = nb_info.get('author', 'anonymous') job_time_fmt = 'JobID,Start,End,Elapsed,Suspended,State'.split(',') job_time_summary = get_job_info(joblist, job_time_fmt) @@ -427,13 +422,21 @@ def finalize(joblist, finaljob, run_path, out_path, version, report_to, data_pat metadata.save() metadata.save_copy(run_path) - sphinx_path = combine_report(run_path, title) - make_titlepage(sphinx_path, title, data_path, version) - make_report( - Path(sphinx_path), - run_path, - title, - author, - version, - Path(report_to), - ) + if report_to: + prepare_plots(run_path) + + sphinx_path = combine_report(run_path, title) + make_titlepage(sphinx_path, title, data_path, version) + make_report( + Path(sphinx_path), + run_path, + title, + author, + version, + Path(report_to), + ) + else: + # If there is no report location, simply copy the slurm_out_ + # directory to the output. + slurm_archive_dir = Path(out_path) / f"slurm_out_{run_path.name}" + move(str(run_path), str(slurm_archive_dir)) diff --git a/src/xfel_calibrate/nb_args.py b/src/xfel_calibrate/nb_args.py new file mode 100644 index 0000000000000000000000000000000000000000..bdf4a7d5e7ab7bb7e5ce62c9011ffa38ceed274f --- /dev/null +++ b/src/xfel_calibrate/nb_args.py @@ -0,0 +1,486 @@ +"""Manipulating notebooks & translating parameters to command-line options +""" +import argparse +import inspect +import os.path +import pprint +import re +import string +import sys +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import nbformat +from nbparameterise import Parameter, extract_parameters + +from .notebooks import notebooks + +PKG_DIR = os.path.dirname(os.path.abspath(__file__)) + + +# Add a class combining raw description formatting with +# Metavariable default outputs +class RawTypeFormatter(argparse.RawDescriptionHelpFormatter, + argparse.MetavarTypeHelpFormatter, + argparse.ArgumentDefaultsHelpFormatter): + pass + + +# The argument parser for calibrate.py, will be extended depending +# on the options given. + +def make_initial_parser(**kwargs): + parser = argparse.ArgumentParser( + description="Main entry point for offline calibration", + formatter_class=RawTypeFormatter, + **kwargs + ) + + parser.add_argument('detector', metavar='DETECTOR', type=str, + help='The detector to calibrate: ' + ", ".join(notebooks)) + + parser.add_argument('type', metavar='TYPE', type=str, + help='Type of calibration.') + + parser.add_argument('--no-cluster-job', + action="store_true", + default=False, + help="Do not run as a cluster job") + + parser.add_argument('--prepare-only', action="store_true", + help="Prepare notebooks but don't run them") + + parser.add_argument('--report-to', type=str, + help='Filename (and optionally path) for output' + ' report') + + parser.add_argument('--not-reproducible', action='store_true', + help='Disable checks to allow the processing result ' + 'to not be reproducible based on its metadata.') + + parser.add_argument('--skip-report', action='store_true', + help='Skip report generation in finalize step.') + + parser.add_argument('--skip-env-freeze', action='store_true', + help='Skip recording the Python environment for ' + 'reproducibility purposes, requires ' + '--not-reproducible to run.') + + parser.add_argument('--concurrency-par', type=str, + help='Name of concurrency parameter.' + 'If not given, it is taken from configuration.') + + parser.add_argument('--constants-from', type=str, help=( + "Path to a calibration-metadata.yml file. If given, " + "retrieved-constants will be copied to use for a new correction." + )) + + parser.add_argument('--vector-figs', action="store_true", default=False, + help="Use vector graphics for figures in the report.") + + parser.add_argument('--slurm-mem', type=int, default=500, + help="Requested node RAM in GB") + + parser.add_argument('--slurm-name', type=str, default='xfel_calibrate', + help='Name of slurm job') + + parser.add_argument('--slurm-scheduling', type=int, default=0, + help='Change scheduling priority for a slurm job ' + '+- 2147483645 (negative value increases ' + 'priority)') + + parser.add_argument('--request-time', type=str, default='Now', + help='Time of request to process notebook. Iso format') + + parser.add_argument_group('required arguments') + + parser.add_argument('--slurm-partition', type=str, default="", + help="Submit jobs in this Slurm partition") + + parser.add_argument('--reservation', type=str, default="", + help="Submit jobs in this Slurm reservation, " + "overriding --slurm-partition if both are set") + + return parser + + +# Helper functions for parser extensions + +def make_intelli_list(ltype): + """ Parses a list from range and comma expressions. + + An expression of the form "1-5,6" will be parsed into the following + list: [1,2,3,4,6] + + """ + class IntelliListAction(argparse.Action): + element_type = ltype + + def __init__(self, *args, **kwargs): + super(IntelliListAction, self).__init__(*args, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + parsed_values = [] + values = ",".join(values) + if isinstance(values, str): + for rcomp in values.split(","): + if "-" in rcomp: + start, end = rcomp.split("-") + parsed_values += list(range(int(start), int(end))) + else: + parsed_values += [int(rcomp)] + elif isinstance(values, (list, tuple)): + parsed_values = values + else: + parsed_values = [values, ] + + parsed_values = [self.element_type(p) for p in parsed_values] + print("Parsed input {} to {}".format(values, parsed_values)) + setattr(namespace, self.dest, parsed_values) + + return IntelliListAction + + +def consolize_name(name): + """ Names of console parameters don't have underscores """ + return name.replace("_", "-") + + +def add_args_from_nb(parms, parser, cvar=None, no_required=False): + """Add argparse arguments for parameters in the first cell of a notebook. + + Uses nbparameterise to extract the parameter information. Each foo_bar + parameter gets a --foo-bar command line option. + Boolean parameters get a pair of flags like --abc and --no-abc. + + :param parms: List of nbparameterise Parameter objects + :param parser: argparse.ArgumentParser instance to modify + :param str cvar: Name of the concurrency parameter. + :param bool no_required: If True, none of the added options are required. + """ + + for p in parms: + helpstr = ("Default: %(default)s" if not p.comment + else "{}. Default: %(default)s".format(p.comment.replace("#", " ").strip())) + required = (p.comment is not None + and "required" in p.comment + and not no_required + and p.name != cvar) + + # This may be not a public API + # May require reprogramming in case of argparse updates + pars_group = parser._action_groups[2 if required else 1] + + default = p.value if (not required) else None + + if issubclass(p.type, list) or p.name == cvar: + ltype = type(p.value[0]) if issubclass(p.type, list) else p.type + range_allowed = "RANGE ALLOWED" in p.comment.upper() if p.comment else False + pars_group.add_argument(f"--{consolize_name(p.name)}", + nargs='+', + type=ltype if not range_allowed else str, + default=default, + help=helpstr, + required=required, + action=make_intelli_list(ltype) if range_allowed else None) + elif issubclass(p.type, bool): + # For a boolean, make --XYZ and --no-XYZ options. + alt_group = pars_group.add_mutually_exclusive_group(required=required) + alt_group.add_argument(f"--{consolize_name(p.name)}", + action="store_true", + default=default, + help=helpstr, + dest=p.name) + alt_group.add_argument(f"--no-{consolize_name(p.name)}", + action="store_false", + default=default, + help=f"Opposite of --{consolize_name(p.name)}", + dest=p.name) + else: + pars_group.add_argument(f"--{consolize_name(p.name)}", + type=p.type, + default=default, + help=helpstr, + required=required) + +def get_cell_n(nb, cell_type, cell_n): + """ + Return notebook cell with given number and given type + + :param nb: jupyter notebook + :param cell_type: cell type, 'code' or 'markdown' + :param cell_n: cell number (count from 0) + :return: notebook cell + """ + counter = 0 + for cell in nb.cells: + if cell.cell_type == cell_type: + if counter == cell_n: + return cell + counter += 1 + + +def first_code_cell(nb): + """ Return the first code cell of a notebook """ + return get_cell_n(nb, 'code', 0) + + +def first_markdown_cell(nb): + """ Return the first markdown cell of a notebook """ + return get_cell_n(nb, 'markdown', 0) + + +def set_figure_format(nb, enable_vector_format): + """Set svg format in inline backend for figures + + If parameter enable_vector_format is set to True, svg format will + be used for figures in the notebook rendering. Subsequently vector + graphics figures will be used for report. + """ + + if enable_vector_format: + cell = get_cell_n(nb, 'code', 1) + cell.source += "\n%config InlineBackend.figure_formats = ['svg']\n" + + +def get_notebook_function(nb, fname): + flines = [] + def_found = False + indent = None + for cell in nb.cells: + if cell.cell_type == 'code': + lines = cell.source.split("\n") + for line in lines: + + if def_found: + lin = len(line) - len(line.lstrip()) + if indent is None: + if lin != 0: + indent = lin + flines.append(line) + elif lin >= indent: + flines.append(line) + else: + return "\n".join(flines) + + if re.search(r"def\s+{}\(.*\):\s*".format(fname), line) and not def_found: + # print("Found {} in line {}".format(fname, line)) + # set this to indent level + def_found = True + flines.append(line) + return None + + +def make_epilog(nb, caltype=None): + """ Make an epilog from the notebook to add to parser help + """ + msg = "" + header_cell = first_markdown_cell(nb) + lines = header_cell.source.split("\n") if header_cell is not None else [''] + if caltype: + msg += "{:<15} {}".format(caltype, lines[0]) + "\n" + else: + msg += "{}".format(lines[0]) + "\n" + pp = pprint.PrettyPrinter(indent=(17 if caltype else 0)) + if len(lines[1:]): + plines = pp.pformat(lines[1:])[1:-1].split("\n") + for line in plines: + sline = line.replace("'", "", 1) + sline = sline.replace("', '", " " * (17 if caltype else 0), 1) + sline = sline[::-1].replace("'", "", 1)[::-1] + sline = sline.replace(" ,", " ") + if len(sline) > 1 and sline[0] == ",": + sline = sline[1:] + msg += sline + "\n" + msg += "\n" + return msg + + +def deconsolize_args(args): + """ Variable names have underscores """ + return {k.replace("-", "_"): v for k, v in args.items()} + + +def extend_params(nb, extend_func_name, argv): + """Add parameters in the first code cell by calling a function in the notebook + """ + func = get_notebook_function(nb, extend_func_name) + + if func is None: + warnings.warn( + f"Didn't find concurrency function {extend_func_name} in notebook", + RuntimeWarning + ) + return + + # Make a temporary parser that won't exit if it sees -h or --help + pre_parser = make_initial_parser(add_help=False) + add_args_from_nb(nb, pre_parser, no_required=True) + known, _ = pre_parser.parse_known_args(argv[1:]) + args = deconsolize_args(vars(known)) + + df = {} + exec(func, df) + f = df[extend_func_name] + sig = inspect.signature(f) + + extension = f(*[args[p] for p in sig.parameters]) + fcc = first_code_cell(nb) + fcc["source"] += "\n" + extension + + +@dataclass +class NBDetails: + """Details of a notebook-based workflow to run""" + detector: str # e.g. AGIPD + caltype: str # e.g. CORRECT + path: Path + pre_paths: List[Path] # Notebooks to run before the main notebook + dep_paths: List[Path] # Notebooks to run after the main notebooks + contents: nbformat.NotebookNode + default_params: List[Parameter] + concurrency: Dict[str, Any] # Contents as in notebooks.py + user_venv: Optional[Path] + + +def parse_argv_and_load_nb(argv) -> Tuple[Dict, NBDetails]: + """Parse command-line arguments for xfel-calibrate to run a notebook""" + # extend the parser according to user input + # the first case is if a detector was given, but no calibration type + if len(argv) == 3 and "-h" in argv[2]: + detector = argv[1].upper() + try: + det_notebooks = notebooks[detector] + except KeyError: + # TODO: This should really go to stderr not stdout + print("Not one of the known detectors: {}".format(notebooks.keys())) + sys.exit(1) + + msg = "Options for detector {}\n".format(detector) + msg += "*" * len(msg) + "\n\n" + + # basically, this creates help in the form of + # + # TYPE some description that is + # indented for this type. + # + # The information is extracted from the first markdown cell of + # the notebook. + for caltype, notebook in det_notebooks.items(): + if notebook.get("notebook") is None: + if notebook.get("user", {}).get("notebook") is None: + raise KeyError( + f"`{detector}` does not have a notebook path, for " + "notebooks that are stored in pycalibration set the " + "`notebook` key to a relative path or set the " + "`['user']['notebook']` key to an absolute path/path " + "pattern. Notebook configuration dictionary contains " + f"only: `{notebook}`" + "" + ) + # Everything should be indented by 17 spaces + msg += caltype.ljust(17) + "User defined notebook, arguments may vary\n" + msg += " "*17 + "User notebook expected to be at path:\n" + msg += " "*17 + notebook["user"]["notebook"] + "\n" + else: + nbpath = os.path.join(PKG_DIR, notebook["notebook"]) + nb = nbformat.read(nbpath, as_version=4) + msg += make_epilog(nb, caltype=caltype) + + make_initial_parser(epilog=msg).parse_args(argv[1:]) + sys.exit() # parse_args should already exit for --help + elif len(argv) <= 3: + make_initial_parser().parse_args(argv[1:]) + sys.exit() # parse_args should already exit - not enough args + + # A detector and type was given. We derive the arguments + # from the corresponding notebook + args, _ = make_initial_parser(add_help=False).parse_known_args(argv[1:]) + try: + nb_info = notebooks[args.detector.upper()][args.type.upper()] + except KeyError: + print("Not one of the known calibrations or detectors") + sys.exit(1) + + # Pick out any arguments that may prevent reproducibility from + # working, sorted alphabetically and converted back to their + # canonical representation. + not_reproducible_args = sorted( + ('--' + x.replace('_', '-') + for x in ['skip_env_freeze'] + if getattr(args, x)) + ) + + # If any of these arguments are set, present a warning. + if not_reproducible_args: + print('WARNING: One or more command line arguments ({}) may prevent ' + 'this specific correction result from being reproducible based ' + 'on its metadata. It may not be possible to restore identical ' + 'output data files when they have been deleted or lost. Please ' + 'ensure that the data retention policy of the chosen storage ' + 'location is sufficient for your ' + 'needs.'.format(', '.join(not_reproducible_args))) + + if not args.not_reproducible: + # If not explicitly specified that reproducibility may be + # broken, remind the user and exit. + print('To proceed, you can explicitly allow reproducibility to ' + 'be broken by adding --not-reproducible') + sys.exit(1) + + if nb_info["notebook"]: + notebook = os.path.join(PKG_DIR, nb_info["notebook"]) + else: + # If `"notebook"` entry is None, then set it to the user provided + # notebook TODO: This is a very hacky workaround, better implementation + # is not really possible with the current state of this module + user_notebook_path = nb_info["user"]["notebook"] + # Pull out the variables in the templated path string, and get values + # from command line args (e.g. --proposal 1234 -> {proposal}) + user_notebook_variables = [ + name for (_, name, _, _) in string.Formatter().parse(user_notebook_path) + if name is not None + ] + + user_notebook_parser = argparse.ArgumentParser(add_help=False) + for var in user_notebook_variables: + user_notebook_parser.add_argument(f"--{var}") + + user_notebook_args, _ = user_notebook_parser.parse_known_args(argv[1:]) + + notebook = user_notebook_path.format(**vars(user_notebook_args)) + + concurrency = nb_info.get("concurrency", {'parameter': None}) + + nb = nbformat.read(notebook, as_version=4) + + # extend parameters if needed + ext_func = nb_info.get("extend parms", None) + if ext_func is not None: + extend_params(nb, ext_func, argv) + + default_params = extract_parameters(nb, lang='python') + + parser = make_initial_parser() + parser.description = make_epilog(nb) + add_args_from_nb(default_params, parser, cvar=concurrency['parameter']) + + arg_dict = deconsolize_args(vars(parser.parse_args(argv[1:]))) + + user_venv = nb_info.get("user", {}).get("venv") + if user_venv is not None: + user_venv = Path(user_venv.format(**arg_dict)) + + return arg_dict, NBDetails( + detector=args.detector.upper(), + caltype=args.type.upper(), + path=Path(notebook), + pre_paths=[Path(PKG_DIR, p) for p in nb_info.get('pre_notebooks', [])], + dep_paths=[Path(PKG_DIR, p) for p in nb_info.get('dep_notebooks', [])], + contents=nb, + default_params=default_params, + concurrency=concurrency, + user_venv=user_venv, + ) diff --git a/src/xfel_calibrate/notebooks.py b/src/xfel_calibrate/notebooks.py index de629fc6f5cddf3665c3fcdcdde63e628afcf289..47085e19aebbb6b8c2bd83db6a2e501268f4685d 100644 --- a/src/xfel_calibrate/notebooks.py +++ b/src/xfel_calibrate/notebooks.py @@ -241,9 +241,9 @@ notebooks = { }, "REMI": { "CORRECT": { - "notebook": None, + "notebook": "notebooks/REMI/REMI_Digitize_and_Transform.ipynb", "user": { - "notebook": "/gpfs/exfel/exp/SQS/202121/p002926/usr/calibration/notebooks/correct.ipynb", + "notebook": None, "venv": "/gpfs/exfel/sw/software/exfel_environments/sqs-remi-preview" }, "concurrency": { diff --git a/src/xfel_calibrate/repeat.py b/src/xfel_calibrate/repeat.py index 95d41b1714c1ade7aba3c09b935a8fbe8a69ece0..12d4b30057022f657d9d44faaba7a121d5d8dcb0 100644 --- a/src/xfel_calibrate/repeat.py +++ b/src/xfel_calibrate/repeat.py @@ -93,6 +93,7 @@ def main(argv=None): temp_path=working_dir, job_list=joblist, sequential=args.no_cluster_job, + partition=args.slurm_partition )) if any(j is not None for j in joblist): diff --git a/src/xfel_calibrate/settings.py b/src/xfel_calibrate/settings.py index 989cd1022a47f85cb4d72af7b05754f416973ec8..15067c0a6ff9e211f1ca3d16280edc9fa524f2d3 100644 --- a/src/xfel_calibrate/settings.py +++ b/src/xfel_calibrate/settings.py @@ -21,11 +21,6 @@ sprof = os.environ.get("XFELCALSLURM", "exfel") # TODO: https://git.xfel.eu/git launcher_command = "sbatch -t 24:00:00 --requeue --output {temp_path}/slurm-%j.out --parsable" free_nodes_cmd = "sinfo -p exfel -t idle -N --noheader | wc -l" preempt_nodes_cmd = "squeue -p all,grid --noheader | grep max-exfl | egrep -v 'max-exfl18[3-8]|max-exfl100|max-exflg' | wc -l" -max_reserved = 8 -# "xcal" reservation value removed as ITDM -# is giving xcal priority by default. -reservation = "" -reservation_char = "darks" # Time limit for the finalize job (creates PDF report & moves files) finalize_time_limit = "30:00" diff --git a/tests/conftest.py b/tests/conftest.py index f8186a5934821cc917c19aa710149b5d424a3f44..a7cfeb0f1e81716dc083aeed46d6a3e45bb6179b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,5 @@ +import socket +from functools import lru_cache from pathlib import Path import pytest @@ -14,12 +16,33 @@ def pytest_addoption(parser): def pytest_configure(config): config.addinivalue_line( - "markers", "requires_gpfs(): marks skips for tests that require GPFS access" + "markers", + "requires_gpfs(): marks skips for tests that require GPFS access", ) + config.addinivalue_line( + "markers", + "requires_caldb(): marks skips for tests that require calDB access", + ) + + +@lru_cache() +def server_reachable(server: str = "max-exfl017"): + reachable = True + + try: + socket.getaddrinfo(server, port=80) + except socket.gaierror: + reachable = False + + return reachable + def pytest_runtest_setup(item): if list(item.iter_markers(name="requires_gpfs")) and ( not Path("/gpfs").is_dir() or item.config.getoption("--no-gpfs") ): pytest.skip("gpfs not available") + + if list(item.iter_markers(name="requires_caldb")) and not server_reachable(): + pytest.skip("caldb not available") diff --git a/tests/test_cal_tools.py b/tests/test_cal_tools.py index a2e14e86642cf67bef19cf27901a6195f703c642..102f613b7b9f3b9bdb032f5ebe5ab218a8d1e098 100644 --- a/tests/test_cal_tools.py +++ b/tests/test_cal_tools.py @@ -7,7 +7,7 @@ import pytest import zmq from iCalibrationDB import Conditions, ConstantMetaData, Constants -from cal_tools.agipdlib import AgipdCorrections +from cal_tools.agipdlib import AgipdCorrections, CellRange from cal_tools.plotting import show_processed_modules from cal_tools.tools import ( get_dir_creation_date, @@ -288,6 +288,7 @@ def test_raise_send_to_db(_agipd_const_cond): assert str(excinfo.value) == "Resource temporarily unavailable" +@pytest.mark.requires_caldb def test_get_pdu_from_db(_agipd_const_cond): constant, condition = _agipd_const_cond @@ -336,13 +337,14 @@ def test_get_pdu_from_db(_agipd_const_cond): # TODO add a marker for accessing zmq end_point @pytest.mark.requires_gpfs +@pytest.mark.requires_caldb def test_initialize_from_db(): creation_time = datetime.strptime( "2020-01-07 13:26:48.00", "%Y-%m-%d %H:%M:%S.%f") agipd_corr = AgipdCorrections( max_cells=MEM_CELLS, - max_pulses=[0, 500, 1]) + cell_sel=CellRange([0, 500, 1], MEM_CELLS)) agipd_corr.allocate_constants( modules=[0], diff --git a/tests/test_xfel_calibrate/conftest.py b/tests/test_xfel_calibrate/conftest.py index 5d35fac3cbc7a174d7c3b4788086f87e5ed8fc03..7533d2f278397fc234f2c780aebdfc217622ade2 100644 --- a/tests/test_xfel_calibrate/conftest.py +++ b/tests/test_xfel_calibrate/conftest.py @@ -14,6 +14,7 @@ class FakeProcessCalibrate(FakeProcess): """Class used to create a fake process which will mock calls to processes expected to be called by pycalibration """ + def __init__(self): """Sets up fake process for use with xfel calibrate @@ -34,8 +35,6 @@ class FakeProcessCalibrate(FakeProcess): """ super().__init__() # Fake calls to slurm - self.register_subprocess(settings.free_nodes_cmd, stdout=["1"]) - self.register_subprocess(settings.preempt_nodes_cmd, stdout=["1"]) self.register_subprocess( ["sbatch", self.any()], stdout=["000000"] ) @@ -226,14 +225,13 @@ class CalibrateCall: if extra_args: self.args.extend(extra_args) - with mock.patch.object(sys, "argv", self.args): - with mock.patch.object(calibrate, "temp_path", str(tmp_path)): - calibrate.run() + with mock.patch.object(calibrate, "temp_path", str(tmp_path)): + calibrate.run(argv=self.args) - out, err = capsys.readouterr() + out, err = capsys.readouterr() - self.out: str = out - self.err: str = err + self.out: str = out + self.err: str = err Paths = NamedTuple( "Paths", diff --git a/tests/test_xfel_calibrate/test_cli.py b/tests/test_xfel_calibrate/test_cli.py index 8eeae450e17dd88225c6f080786f83208cdcdfec..1bb0c1a2794bfde1932f4b4702e63b18facdfa79 100644 --- a/tests/test_xfel_calibrate/test_cli.py +++ b/tests/test_xfel_calibrate/test_cli.py @@ -30,10 +30,9 @@ class TestBasicCalls: that the expected output is present in stdout """ - @mock.patch.object(sys, "argv", ["xfel-calibrate", "--help"]) def test_help(self, capsys): with pytest.raises(SystemExit): - calibrate.run() + calibrate.run(["xfel-calibrate", "--help"]) out, err = capsys.readouterr() @@ -43,10 +42,9 @@ class TestBasicCalls: assert err == "" - @mock.patch.object(sys, "argv", ["xfel-calibrate", "TEST", "-h"]) def test_help_detector(self, capsys): with pytest.raises(SystemExit): - calibrate.run() + calibrate.run(["xfel-calibrate", "TEST", "-h"]) out, err = capsys.readouterr() @@ -55,10 +53,9 @@ class TestBasicCalls: assert err == "" - @mock.patch.object(sys, "argv", ["xfel-calibrate", "TEST", "-h"]) def test_help_user_notebook(self, capsys): with pytest.raises(SystemExit): - calibrate.run() + calibrate.run(["xfel-calibrate", "TEST", "-h"]) out, err = capsys.readouterr() @@ -67,15 +64,13 @@ class TestBasicCalls: assert err == "" - @mock.patch.object(sys, "argv", ["xfel-calibrate", "TEST-RAISES-ERRORS", "--help"]) def test_help_bad_config(self): with pytest.raises(KeyError): - calibrate.run() + calibrate.run(["xfel-calibrate", "TEST-RAISES-ERRORS", "--help"]) - @mock.patch.object(sys, "argv", ["xfel-calibrate", "NotADetector", "beep", "-h"]) def test_unknown_detector(self, capsys): with pytest.raises(SystemExit) as exit_exception: - calibrate.run() + calibrate.run(["xfel-calibrate", "NotADetector", "beep", "-h"]) out, err = capsys.readouterr() @@ -85,10 +80,9 @@ class TestBasicCalls: assert err == "" - @mock.patch.object(sys, "argv", ["xfel-calibrate", "NotADetector", "-h"]) def test_unknown_detector_h(self, capsys): with pytest.raises(SystemExit) as exit_exception: - calibrate.run() + calibrate.run(["xfel-calibrate", "NotADetector", "-h"]) out, err = capsys.readouterr() @@ -98,10 +92,9 @@ class TestBasicCalls: assert err == "" - @mock.patch.object(sys, "argv", ["xfel-calibrate", "Tutorial", "TEST", "--help"]) def test_help_nb(self, capsys): with pytest.raises(SystemExit): - calibrate.run() + calibrate.run(["xfel-calibrate", "Tutorial", "TEST", "--help"]) out, err = capsys.readouterr() @@ -165,6 +158,7 @@ class TestTutorialNotebook: def test_expected_processes_called( self, + calibrate_call: CalibrateCall, fake_process_calibrate: FakeProcessCalibrate, ): process_calls = [ diff --git a/tests/test_xfel_calibrate/test_user_configs.py b/tests/test_xfel_calibrate/test_user_configs.py index 21feb55a995ec06e3deb1d71c64d6fe7abdc67cd..cbae495977b217409b64acc8c753facd63f36070 100644 --- a/tests/test_xfel_calibrate/test_user_configs.py +++ b/tests/test_xfel_calibrate/test_user_configs.py @@ -126,6 +126,7 @@ class TestUserVenv: def test_expected_processes_called( self, + calibrate_call: CalibrateCall, mock_proposal: MockProposal, fake_process_calibrate: FakeProcessCalibrate, ): diff --git a/webservice/config/serve_overview.yaml b/webservice/config/serve_overview.yaml index 0e9262ae5fde526454a7844655f3b1ee63bc58f6..a1e6b6b3ec82aa017bd37e030cddc2ea082c81e2 100644 --- a/webservice/config/serve_overview.yaml +++ b/webservice/config/serve_overview.yaml @@ -9,10 +9,7 @@ templates: css: "@format {this.webservice_dir}/templates/serve_overview.css" shell-commands: - nodes-avail-res: "sinfo --nodes=`sinfo -p exfel -t idle -N --noheader -T | grep {} | awk '{{print $6}}'` --noheader -p exfel -o %A" total-jobs: "sinfo -p exfel -o %A --noheader" - upex-jobs: "sinfo -T --noheader | awk '{print $1}'" - upex-prefix: "upex_" tail-log: "tail -5000 web.log" cat-log: "cat web.log" diff --git a/webservice/serve_overview.py b/webservice/serve_overview.py index c1e0a4f903beec6ab633494638cb13eebd59a3fe..919d48c153aa102b3dc0dfed40e5b0d5b7f8bd6a 100644 --- a/webservice/serve_overview.py +++ b/webservice/serve_overview.py @@ -12,9 +12,13 @@ from typing import Optional import yaml from jinja2 import Template -from xfel_calibrate.settings import free_nodes_cmd, preempt_nodes_cmd, reservation # noqa: E501 +from xfel_calibrate.settings import free_nodes_cmd, preempt_nodes_cmd + +try: + from .config import serve_overview as config +except: + from config import serve_overview as config -from .config import serve_overview as config class LimitedSizeDict(OrderedDict): @@ -44,10 +48,7 @@ class RequestHandler(BaseHTTPRequestHandler): global cal_config - self.nodes_avail_res_cmd = config["shell-commands"]["nodes-avail-res"] self.total_jobs_cmd = config["shell-commands"]["total-jobs"] - self.upex_jobs_cmd = config["shell-commands"]["upex-jobs"] - self.upex_prefix = config["shell-commands"]["upex-prefix"] self.tail_log_cmd = config["shell-commands"]["tail-log"] self.cat_log_cmd = config["shell-commands"]["cat-log"] self.run_candidates = config["run-candidates"] @@ -159,44 +160,13 @@ class RequestHandler(BaseHTTPRequestHandler): preempt = int(check_output( preempt_nodes_cmd, shell=True).decode('utf8')) nodes_avail_general = free + preempt - ret = check_output(self.nodes_avail_res_cmd.format(reservation), - shell=True).decode('utf8') - ures = ret.split("/") - if len(ures) == 2: - used, reserved = ures - else: - used = 0 - reserved = 0 - nodes_avail_gr = "{}/{}".format(int(reserved) - int(used), - reserved) total_jobs_running = check_output(self.total_jobs_cmd, shell=True) total_jobs_running = total_jobs_running.decode('utf8').split("/")[0] - upex_res = [r for r in - check_output(self.upex_jobs_cmd, - shell=True).decode('utf8').split() - if self.upex_prefix in r] - - upex_reservations = {} - for res in upex_res: - nodes_avail_res = check_output( - self.nodes_avail_res_cmd.format(res), - shell=True).decode('utf8') - used, reserved = nodes_avail_res.split("/") - nodes_avail_res = "{}/{}".format(int(reserved) - int(used), - reserved) - upex_reservations[res] = nodes_avail_res - - recommendation = "DON'T SUBMIT TO RESERVATION" - if nodes_avail_general < int(reserved) - int(used): - "CONSIDER SUBMITTING TO RESERVATION" - tmpl = Template(self.templates["maxwell-status"]) maxwell_status_r = tmpl.render(nodes_avail_general=nodes_avail_general, - nodes_avail_general_res=nodes_avail_gr, total_jobs_running=total_jobs_running, - upex_reservations=upex_reservations, - recommendation=recommendation) + ) last_n_lines = check_output(self.tail_log_cmd, shell=True).decode('utf8').split("\n") diff --git a/webservice/templates/maxwell_status.html b/webservice/templates/maxwell_status.html index 3910cddb74749ba73aeea8009790182325ff8d65..b669d01be5c109ff29cd07ec27e835be63507581 100644 --- a/webservice/templates/maxwell_status.html +++ b/webservice/templates/maxwell_status.html @@ -14,16 +14,8 @@ <dd>{{ total_jobs_running }}</dd> <dt>Nodes available on general partition</dt> <dd>{{ nodes_avail_general }}</dd> - <dt>Nodes available on general reservation</dt> - <dd>{{ nodes_avail_general_res }}</dd> - {% for res, avail in upex_reservations.items() %} - <dt>Nodes available on {{ res }}</dt> - <dd>{{ avail }}</dd> - {% endfor %} - <dt>Recommendation</dt> - <dd>{{ recommendation }}</dd> </dl> -<p><b>Note:</b> This is only a hint! Maxwell status can change quickly, so the feedback here +<p><b>Note:</b> Maxwell status can change quickly, so the feedback here can rapidly become outdated. </p> </div> diff --git a/webservice/webservice.py b/webservice/webservice.py index 4b26e03b961875ddfb2c2518e906a56addd60b00..3c1fc1b1f063c3ac28390c177bffad164512a62c 100644 --- a/webservice/webservice.py +++ b/webservice/webservice.py @@ -427,21 +427,31 @@ def update_job_db(config): except KafkaError: logging.warning("Error sending Kafka notification", exc_info=True) - mdc.update_run_api( - rid, # The result from MyMDC does not matter here. - {'cal_last_end_at': datetime.now(tz=timezone.utc).isoformat()} - ) + + if all(s.startswith('PD-') for s in statii): + # Avoid swamping myMdC with updates for jobs still pending. + logging.debug( + "No update for action %s, rid %s: jobs pending", + action, rid + ) + continue + msg = "\n".join(statii) msg_debug = f"Update MDC {rid}, {msg}" logging.debug(msg_debug.replace('\n', ', ')) + if action == 'CORRECT': - response = mdc.update_run_api(rid, - {'flg_cal_data_status': flg, - 'cal_pipeline_reply': msg}) + data = {'flg_cal_data_status': flg, + 'cal_pipeline_reply': msg} + if flg != 'R': + data['cal_last_end_at'] = datetime.now(tz=timezone.utc).isoformat() + response = mdc.update_run_api(rid, data) + else: # action == 'DARK' but it's dark_request data = {'dark_run': {'flg_status': dark_flags[flg], 'calcat_feedback': msg}} response = mdc.update_dark_run_api(rid, data) + if response.status_code != 200: logging.error("Failed to update MDC for action %s, rid %s", action, rid) @@ -513,13 +523,22 @@ async def run_action(job_db, cmd, mode, proposal, run, rid) -> str: rstr = stdout.decode() for r in rstr.split("\n"): - if "Submitted job:" in r: - _, jobid = r.split(":") - c.execute( - "INSERT INTO jobs VALUES (?, ?, ?, ?, 'PD', ?, ?, ?)", - (rid, jobid.strip(), proposal, run, - datetime.now().isoformat(), cmd[3], cmd[4]) - ) + if "Submitted the following SLURM jobs:" in r: + _, jobids = r.split(":") + + jobs = [] + for jobid in jobids.split(','): + jobs.append((rid, + jobid.strip(), + proposal, + run, + datetime.now().isoformat(), + cmd[3], + cmd[4]) + ) + c.executemany( + "INSERT INTO jobs VALUES (?, ?, ?, ?, 'PD', ?, ?, ?)", + jobs) job_db.commit() else: # mode == "sim" @@ -819,8 +838,8 @@ class ActionsServer: :param instrument: is the instrument :param cycle: is the facility cycle :param proposal: is the proposal id - :param runnr: is the run number in integer form, e.g. without leading - "r" + :param runnr: the run number in integer form, i.e. without leading "r" + :param priority: unused, retained for compatibility This will trigger a correction process to be launched for that run in the given cycle and proposal. @@ -892,8 +911,6 @@ class ActionsServer: thisconf["out-folder"] = out_folder thisconf["karabo-id"] = karabo_id thisconf["run"] = runnr - if priority: - thisconf["priority"] = str(priority) detectors[karabo_id] = thisconf copy_file_set = copy_file_list.difference(corr_file_list)