diff --git a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb index 6b66a1a7a951bb15a65c0faf515592765e9bb80c..962d0d6cb514b0edd8a34f8867ad557595b2e879 100644 --- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb +++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb @@ -18,7 +18,7 @@ "outputs": [], "source": [ "in_folder = \"/gpfs/exfel/exp/SPB/202131/p900230/raw\" # the folder to read data from, required\n", - "out_folder = \"/gpfs/exfel/data/scratch/esobolev/pycal_litfrm/p900230\" # the folder to output to, required\n", + "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/remove/agipd_resolve_conf\" # the folder to output to, required\n", "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", "modules = [-1] # modules to correct, set to -1 for all, range allowed\n", "train_ids = [-1] # train IDs to correct, set to -1 for all, range allowed\n", @@ -26,31 +26,27 @@ "\n", "karabo_id = \"SPB_DET_AGIPD1M-1\" # karabo karabo_id\n", "karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators\n", - "receiver_id = \"{}CH0\" # inset for receiver devices\n", + "receiver_template = \"{}CH0\" # inset for receiver devices\n", "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", - "h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", - "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n", + "instrument_source_template = '{}/DET/{}:xtdf' # path in the HDF5 file to images\n", + "index_source_template = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", + "ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information\n", "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device\n", - "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n", "\n", "slopes_ff_from_files = \"\" # Path to locally stored SlopesFF and BadPixelsFF constants\n", "\n", "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n", - "cal_db_timeout = 30000 # in milli seconds\n", + "cal_db_timeout = 30000 # in milliseconds\n", "creation_date_offset = \"00:00:00\" # add an offset to creation date, e.g. to get different constants\n", "\n", "use_ppu_device = '' # Device ID for a pulse picker device to only process picked trains, empty string to disable\n", "ppu_train_offset = 0 # When using the pulse picker, offset between the PPU's sequence start and actually picked train\n", "\n", - "use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable\n", - "energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled\n", - "\n", - "max_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", - "bias_voltage = 300 # Bias voltage\n", + "mem_cells = 0 # Number of memory cells used, set to 0 to automatically infer\n", + "bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.\n", "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n", - "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n", + "gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.\n", "gain_mode = -1 # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)\n", "photon_energy = 9.2 # photon energy in keV\n", "overwrite = True # set to True if existing data should be overwritten\n", @@ -80,7 +76,7 @@ "zero_nans = False # set NaN values in corrected data to 0\n", "zero_orange = False # set to 0 very negative and very large values in corrected data\n", "blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr\n", - "corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted\n", + "corr_asic_diag = False # if set, diagonal drop offs on ASICs are corrected\n", "force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold\n", "force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold\n", "mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold\n", @@ -89,17 +85,20 @@ "mask_zero_std = False # Mask pixels with zero standard deviation across train\n", "low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain\n", "\n", + "use_litframe_device = '' # Device ID for a lit frame finder device to only process illuminated frames, empty string to disable\n", + "energy_threshold = -1000 # The low limit for the energy (uJ) exposed by frames subject to processing. If -1000, selection by pulse energy is disabled\n", + "\n", "# Plotting parameters\n", "skip_plots = False # exit after writing corrected files and metadata\n", "cell_id_preview = 1 # cell Id used for preview in single-shot plots\n", "\n", "# Paralellization parameters\n", - "chunk_size = 1000 # Size of chunk for image-weise correction\n", - "chunk_size_idim = 1 # chunking size of imaging dimension, adjust if user software is sensitive to this.\n", + "chunk_size = 1000 # Size of chunk for image-wise correction\n", "n_cores_correct = 16 # Number of chunks to be processed in parallel\n", "n_cores_files = 4 # Number of files to be processed in parallel\n", - "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n", - "max_nodes = 8 # Maximum number of Slurm jobs to split correction work into\n", + "sequences_per_node = 2 # number of sequence files per cluster node if run as SLURM job, set to 0 to not run SLURM parallel\n", + "max_nodes = 8 # Maximum number of SLURM jobs to split correction work into\n", + "\n", "\n", "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):\n", " from xfel_calibrate.calibrate import balance_sequences as bs\n", @@ -131,7 +130,7 @@ "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import yaml\n", - "from extra_data import RunDirectory, stack_detector_data, by_id\n", + "from extra_data import H5File, RunDirectory, stack_detector_data, by_id\n", "from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry\n", "from matplotlib import cm as colormap\n", "from matplotlib.colors import LogNorm\n", @@ -150,13 +149,9 @@ "from cal_tools import agipdalgs as calgs\n", "from cal_tools.agipdlib import (\n", " AgipdCorrections,\n", + " AgipdCtrl,\n", " CellRange,\n", " LitFrameSelection,\n", - " get_acq_rate,\n", - " get_gain_mode,\n", - " get_integration_time,\n", - " get_gain_setting,\n", - " get_num_cells,\n", ")\n", "from cal_tools.ana_tools import get_range\n", "from cal_tools.enums import AgipdGainMode, BadPixels\n", @@ -174,7 +169,8 @@ "outputs": [], "source": [ "in_folder = Path(in_folder)\n", - "out_folder = Path(out_folder)" + "out_folder = Path(out_folder)\n", + "run_folder = in_folder / f'r{run:04d}'" ] }, { @@ -240,12 +236,11 @@ "if sequences == [-1]:\n", " sequences = None\n", "\n", - "control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", - "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", - "h5path = h5path.format(karabo_id, receiver_id)\n", - "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n", + "dc = RunDirectory(run_folder)\n", "\n", - "print(f'Path to control file {control_fn}')" + "ctrl_src = ctrl_source_template.format(karabo_id_control)\n", + "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n", + "index_src = index_source_template.format(karabo_id, receiver_template)" ] }, { @@ -291,7 +286,6 @@ "source": [ "if use_ppu_device:\n", " # Obtain trains to process if using a pulse picker device.\n", - " dc = RunDirectory(in_folder / f'r{run:04d}')\n", "\n", " # Will throw an uncaught exception if the device is wrong.\n", " seq_start = dc[use_ppu_device, 'trainTrigger.sequenceStart.value'].ndarray()\n", @@ -301,21 +295,6 @@ " train_ids = np.unique(seq_start)[1:] + ppu_train_offset\n", "\n", " print(f'PPU device {use_ppu_device} triggered for {len(train_ids)} train(s)')\n", - " \n", - " # Since we got the DataCollection already, narrow down the files we open.\n", - " # This hardcodes the receiver_id and path_template parameters currently, but this\n", - " # will disappear with moving the entire notebook to EXtra-data.\n", - " subdc = dc.select_trains(by_id[train_ids]).select(f'{karabo_id}/DET/*CH0:xtdf')\n", - " subseq = {int(f.filename[-8:-3]) for f in subdc.files}\n", - " \n", - " if sequences is None:\n", - " # All sequences were meant to be processed by this job, so take the entire\n", - " # subset of sequences.\n", - " sequences = sorted(subseq)\n", - " else:\n", - " # If explicit sequences were specified (e.g. due to job balancing by xfel-calibrate)\n", - " # only work on the intersection between that and what the PPU device offers.\n", - " sequences = sorted(set(sequences) & subseq)\n", "\n", "elif train_ids != [-1]:\n", " # Specific trains passed by parameter, convert to ndarray.\n", @@ -332,9 +311,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# set everything up filewise\n", @@ -368,25 +345,18 @@ "metadata": {}, "outputs": [], "source": [ - "filename = file_list[0]\n", - "channel = int(re.findall(r\".*-AGIPD([0-9]+)-.*\", filename)[0])\n", - "\n", - "# Evaluate number of memory cells\n", - "mem_cells = get_num_cells(filename, karabo_id, channel)\n", - "if mem_cells is None:\n", - " raise ValueError(f\"No raw images found in {filename}\")\n", - "\n", - "mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db\n", - "max_cells = mem_cells if max_cells == 0 else max_cells\n", - "\n", - "fast_paths = (filename, karabo_id, channel)\n", - "slow_paths = (control_fn, karabo_id_control)\n", - "\n", - "# Evaluate aquisition rate\n", - "if acq_rate == 0:\n", - " acq_rate = get_acq_rate(fast_paths, slow_paths)\n", - "\n", - "print(f\"Maximum memory cells to calibrate: {max_cells}\")" + "first_mod_channel = sorted(modules)[0]\n", + "\n", + "instrument_src_mod = [\n", + " s for s in list(dc.all_sources) if f\"{first_mod_channel}CH\" in s][0]\n", + "mod_channel = int(re.findall(rf\".*{first_mod_channel}CH([0-9]+):.*\", instrument_src_mod)[0])\n", + "\n", + "agipd_cond = AgipdCtrl(\n", + " run_dc=dc,\n", + " image_src=instrument_src_mod,\n", + " ctrl_src=ctrl_src,\n", + " raise_error=False, # to be able to process very old data without gain_setting value\n", + ")" ] }, { @@ -395,24 +365,29 @@ "metadata": {}, "outputs": [], "source": [ - "if use_litframe_device:\n", - " # check run for the AgipdLitFrameFinder device\n", - " try: dc\n", - " except NameError: dc = RunDirectory(in_folder / f'r{run:04d}')\n", + "# Evaluate creation time\n", + "creation_time = None\n", + "if use_dir_creation_date:\n", + " creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)\n", + " offset = parser.parse(creation_date_offset)\n", + " delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)\n", + " creation_time += delta\n", "\n", - " if use_litframe_device + ':output' in dc.instrument_sources:\n", - " # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)\n", - " cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)\n", - " train_ids = cell_sel.train_ids\n", - " else:\n", - " # Use range selection (if the device is not recorded)\n", - " print(f\"WARNING: LitFrameFinder {use_litframe_device} device is not found.\")\n", - " cell_sel = CellRange(max_pulses, max_cells=max_cells)\n", + "if acq_rate == 0.:\n", + " acq_rate = agipd_cond.get_acq_rate()\n", + "if mem_cells == 0.:\n", + " mem_cells = agipd_cond.get_num_cells()\n", + "# TODO: look for alternative for passing creation_time\n", + "if gain_setting == -1:\n", + " gain_setting = agipd_cond.get_gain_setting(creation_time)\n", + "if bias_voltage == 0.:\n", + " bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)\n", + "if integration_time == -1:\n", + " integration_time = agipd_cond.get_integration_time()\n", + "if gain_mode == -1:\n", + " gain_mode = agipd_cond.get_gain_mode()\n", "else:\n", - " # Use range selection\n", - " cell_sel = CellRange(max_pulses, max_cells=max_cells)\n", - "\n", - "print(cell_sel.msg())" + " gain_mode = AgipdGainMode(gain_mode)" ] }, { @@ -421,37 +396,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Evaluate creation time\n", - "creation_time = None\n", - "if use_dir_creation_date:\n", - " creation_time = cal_tools.tools.get_dir_creation_date(str(in_folder), run)\n", - " offset = parser.parse(creation_date_offset)\n", - " delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)\n", - " creation_time += delta\n", + "if mem_cells is None:\n", + " raise ValueError(f\"No raw images found in {filename}\")\n", "\n", - "# Evaluate gain setting\n", - "if gain_setting == 0.1:\n", - " if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n", - " print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n", - " gain_setting = None\n", - " else:\n", - " try:\n", - " gain_setting = get_gain_setting(str(control_fn), h5path_ctrl)\n", - " except Exception as e:\n", - " print(f'ERROR: while reading gain setting from: \\n{control_fn}')\n", - " print(e)\n", - " print(\"Set gain setting to 0\")\n", - " gain_setting = 0\n", - "\n", - "# Evaluate gain mode (operation mode)\n", - "if gain_mode < 0:\n", - " gain_mode = get_gain_mode(control_fn, h5path_ctrl)\n", - "else:\n", - " gain_mode = AgipdGainMode(gain_mode)\n", + "mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db\n", "\n", - "# Evaluate integration time\n", - "if integration_time < 0:\n", - " integration_time = get_integration_time(control_fn, h5path_ctrl)" + "print(f\"Maximum memory cells to calibrate: {mem_cells}\")" ] }, { @@ -484,6 +434,30 @@ " corr_bools[to_disable] = False" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if use_litframe_device:\n", + " # check run for the AgipdLitFrameFinder device\n", + "\n", + " if use_litframe_device + ':output' in dc.instrument_sources:\n", + " # Use selection provided by the AgipdLitFrameFinder (if the device is recorded)\n", + " cell_sel = LitFrameSelection(use_litframe_device, dc, train_ids, max_pulses, energy_threshold)\n", + " train_ids = cell_sel.train_ids\n", + " else:\n", + " # Use range selection (if the device is not recorded)\n", + " print(f\"WARNING: LitFrameFinder {use_litframe_device} device is not found.\")\n", + " cell_sel = CellRange(max_pulses, max_cells=mem_cells)\n", + "else:\n", + " # Use range selection\n", + " cell_sel = CellRange(max_pulses, max_cells=mem_cells)\n", + "\n", + "print(cell_sel.msg())" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -498,10 +472,10 @@ "outputs": [], "source": [ "agipd_corr = AgipdCorrections(\n", - " max_cells,\n", + " mem_cells,\n", " cell_sel,\n", - " h5_data_path=h5path,\n", - " h5_index_path=h5path_idx,\n", + " h5_data_path=instrument_src,\n", + " h5_index_path=index_src,\n", " corr_bools=corr_bools,\n", " gain_mode=gain_mode,\n", " comp_threads=os.cpu_count() // n_cores_files,\n", @@ -591,7 +565,7 @@ "outputs": [], "source": [ "# allocate memory for images and hists\n", - "n_images_max = max_cells * 256\n", + "n_images_max = mem_cells * 256\n", "data_shape = (n_images_max, 512, 128)\n", "agipd_corr.allocate_images(data_shape, n_cores_files)" ] @@ -621,6 +595,8 @@ "\n", " Yields (file data slot, start index, stop index)\n", " \"\"\"\n", + " \n", + " \n", " for i_proc, n_img in enumerate(img_counts):\n", " n_chunks = math.ceil(n_img / chunk_size)\n", " for i in range(n_chunks):\n", @@ -650,8 +626,11 @@ " for file_batch in batches(file_list, n_cores_files):\n", " # TODO: Move some printed output to logging or similar\n", " print(f\"Processing next {len(file_batch)} files\")\n", - " img_counts = pool.starmap(agipd_corr.read_file, zip(range(len(file_batch)), file_batch,\n", - " [not common_mode]*len(file_batch)))\n", + " step_timer.start()\n", + " img_counts = pool.starmap(\n", + " agipd_corr.read_file,\n", + " zip(range(len(file_batch)), file_batch, [not common_mode]*len(file_batch))\n", + " )\n", " step_timer.done_step(f'Loading data from files')\n", "\n", " if img_counts == 0:\n", @@ -661,9 +640,12 @@ "\n", " if mask_zero_std:\n", " # Evaluate zero-data-std mask\n", - " pool.starmap(agipd_corr.mask_zero_std, itertools.product(\n", - " range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)\n", - " ))\n", + " pool.starmap(\n", + " agipd_corr.mask_zero_std, itertools.product(\n", + " range(len(file_batch)),\n", + " np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)\n", + " )\n", + " )\n", " step_timer.done_step('Mask 0 std')\n", "\n", " # Perform offset image-wise correction\n", @@ -676,6 +658,8 @@ " step_timer.done_step(\"Base-line shift correction\")\n", "\n", " if common_mode:\n", + " # In common mode corrected is enabled.\n", + " # Cell selection is only activated after common mode correction.\n", " # Perform cross-file correction parallel over asics\n", " pool.starmap(agipd_corr.cm_correction, itertools.product(\n", " range(len(file_batch)), range(16) # 16 ASICs per module\n", @@ -683,11 +667,12 @@ " step_timer.done_step(\"Common-mode correction\")\n", "\n", " img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))\n", - " step_timer.done_step(\"Applying selected pulses after common mode correction\")\n", + " step_timer.done_step(\"Applying selected cells after common mode correction\")\n", "\n", " # Perform image-wise correction\n", - " pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))\n", - " step_timer.done_step(\"Gain corrections\")\n", + " if any(agipd_corr.pc_bools):\n", + " pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))\n", + " step_timer.done_step(\"Gain corrections\")\n", "\n", " # Save corrected data\n", " pool.starmap(agipd_corr.write_file, [\n", @@ -815,17 +800,17 @@ "metadata": {}, "outputs": [], "source": [ - "def get_trains_data(run_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):\n", + "def get_trains_data(data_folder, source, include, detector_id, tid=None, modules=16, fillvalue=None):\n", " \"\"\"Load single train for all module\n", "\n", - " :param run_folder: Path to folder with data\n", + " :param data_folder: Path to folder with data\n", " :param source: Data source to be loaded\n", " :param include: Inset of file name to be considered\n", " :param detector_id: The karabo id of the detector to get data for\n", " :param tid: Train Id to be loaded. First train is considered if None is given\n", " :param path: Path to find image data inside h5 file\n", " \"\"\"\n", - " run_data = RunDirectory(run_folder, include)\n", + " run_data = RunDirectory(data_folder, include)\n", " if tid is not None:\n", " tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)\n", " else:\n", @@ -865,7 +850,7 @@ "_, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)\n", "_, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)\n", "_, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)\n", - "_, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, karabo_id, tid, modules=nmods)" + "_, raw = get_trains_data(run_folder, 'image.data', include, karabo_id, tid, modules=nmods)" ] }, { @@ -1255,5 +1240,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb index dbcfe6158dd2c725c11fac2402d2acde19aab711..2cd11eb0b59cbb97f5c72c5e2ac9670e5573b95e 100644 --- a/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb +++ b/notebooks/AGIPD/AGIPD_Retrieve_Constants_Precorrection.ipynb @@ -26,9 +26,10 @@ "karabo_id = \"SPB_DET_AGIPD1M-1\" # karabo karabo_id\n", "karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators\n", "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information\n", + "ctrl_source_template = '{}/MDL/FPGA_COMP_TEST' # path to control information\n", + "instrument_source_template = '{}/DET/{}:xtdf' # path in the HDF5 file to images\n", + "receiver_template = \"{}CH0\" # inset for receiver devices\n", "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device\n", - "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n", "\n", "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n", @@ -38,13 +39,11 @@ "calfile = \"\" # path to calibration file. Leave empty if all data should come from DB\n", "nodb = False # if set only file-based constants will be used\n", "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", - "bias_voltage = 300\n", + "bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.\n", "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n", - "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n", + "gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.\n", "gain_mode = -1 # gain mode (0: adaptive, 1-3 fixed high/med/low, -1: read from CONTROL data)\n", "photon_energy = 9.2 # photon energy in keV\n", - "max_cells_db_dark = 0 # set to a value different than 0 to use this value for dark data DB queries\n", - "max_cells_db = 0 # set to a value different than 0 to use this value for DB queries\n", "integration_time = -1 # integration time, negative values for auto-detection.\n", "\n", "# Correction Booleans\n", @@ -86,20 +85,21 @@ "metadata": {}, "outputs": [], "source": [ + "from pathlib import Path\n", "from typing import List, Tuple\n", "\n", "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import multiprocessing\n", "import numpy as np\n", + "from datetime import timedelta\n", + "from dateutil import parser\n", + "from extra_data import RunDirectory\n", "\n", "matplotlib.use(\"agg\")\n", - "import multiprocessing\n", - "from datetime import timedelta\n", - "from pathlib import Path\n", "\n", - "import matplotlib.pyplot as plt\n", "from cal_tools import agipdlib, tools\n", "from cal_tools.enums import AgipdGainMode\n", - "from dateutil import parser\n", "from iCalibrationDB import Conditions, Constants, Detectors" ] }, @@ -121,8 +121,6 @@ "metadata": {}, "outputs": [], "source": [ - "max_cells = mem_cells\n", - "\n", "creation_time = None\n", "if use_dir_creation_date:\n", " creation_time = tools.get_dir_creation_date(str(in_folder), run)\n", @@ -146,53 +144,21 @@ "metadata": {}, "outputs": [], "source": [ - "control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", - "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", - "slow_paths = (control_fn, karabo_id_control)\n", - "if gain_setting == 0.1:\n", - " if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n", - " print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n", - " gain_setting = None\n", - " else:\n", - " try:\n", - " gain_setting = agipdlib.get_gain_setting(str(control_fn), h5path_ctrl)\n", - " except Exception as e:\n", - " print(f'ERROR: while reading gain setting from: \\n{control_fn}')\n", - " print(e)\n", - " print(\"Set gain setting to 0\")\n", - " gain_setting = 0\n", - "\n", - "# Evaluate gain mode (operation mode)\n", - "if gain_mode < 0:\n", - " gain_mode = agipdlib.get_gain_mode(control_fn, h5path_ctrl)\n", - "else:\n", - " gain_mode = AgipdGainMode(gain_mode)\n", - "\n", - "# Evaluate integration time\n", - "if integration_time < 0:\n", - " integration_time = agipdlib.get_integration_time(control_fn, h5path_ctrl)\n", - " \n", - "print(f\"Gain setting: {gain_setting}\")\n", - "print(f\"Gain mode: {gain_mode.name}\")\n", - "print(f\"Detector in use is {karabo_id}\")\n", + "ctrl_src = ctrl_source_template.format(karabo_id_control)\n", "\n", + "print(f\"Detector in use is {karabo_id}\")\n", "\n", "# Extracting Instrument string\n", "instrument = karabo_id.split(\"_\")[0]\n", "# Evaluate detector instance for mapping\n", "if instrument == \"SPB\":\n", - " dinstance = \"AGIPD1M1\"\n", " nmods = 16\n", "elif instrument == \"MID\":\n", - " dinstance = \"AGIPD1M2\"\n", " nmods = 16\n", "elif instrument == \"HED\":\n", - " dinstance = \"AGIPD500K\"\n", " nmods = 8\n", "\n", "print(f\"Instrument {instrument}\")\n", - "print(f\"Detector instance {dinstance}\")\n", - "\n", "\n", "if karabo_da[0] == '-1':\n", " if modules[0] == -1:\n", @@ -202,6 +168,48 @@ " modules = [int(x[-2:]) for x in karabo_da]" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_dc = RunDirectory(in_folder / f\"r{run:04d}\")\n", + "\n", + "# set everything up filewise\n", + "mapped_files, _, _, _, _ = tools.map_modules_from_folder(\n", + " str(in_folder), run, path_template, karabo_da, sequences=[0]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read AGIPD conditions from the 1st sequence of 1st module and slow data.\n", + "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n", + "instrument_src_mod = instrument_src.format(0)\n", + "\n", + "agipd_cond = agipdlib.AgipdCtrl(\n", + " run_dc=run_dc,\n", + " image_src=None, # Not need, as we wont read mem_cells or acq_rate.\n", + " ctrl_src=ctrl_src,\n", + ")\n", + "\n", + "if gain_setting == -1:\n", + " gain_setting = agipd_cond.get_gain_setting(creation_time)\n", + "if bias_voltage == 0.:\n", + " bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)\n", + "if integration_time == -1:\n", + " integration_time = agipd_cond.get_integration_time()\n", + "if gain_mode == -1:\n", + " gain_mode = agipd_cond.get_gain_mode()\n", + "else:\n", + " gain_mode = AgipdGainMode(gain_mode)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -216,34 +224,33 @@ "outputs": [], "source": [ "def retrieve_constants(\n", - " qm_files: List[Path], qm: str, karabo_da: str, idx: int\n", + " karabo_da: str, idx: int\n", ") -> Tuple[str, str, float, float, str, dict]:\n", " \"\"\"\n", " Retrieve constants for a module.\n", "\n", " :return:\n", - " qm: module virtual name i.e. Q1M1.\n", " karabo_da: karabo data aggregator.\n", " acq_rate: acquisition rate parameter.\n", - " max_cells: number of memory cells.\n", + " mem_cells: number of memory cells.\n", " mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.\n", " \"\"\"\n", - " if max_cells != 0:\n", - " # either use overriding notebook parameter\n", - " local_max_cells = max_cells\n", + " if mem_cells == 0:\n", + " # either or look around in sequence files\n", + " agipd_cond.image_src = instrument_src.format(idx)\n", + " local_mem_cells = agipd_cond.get_num_cells()\n", " else:\n", - " # or look around in sequence files\n", - " for f in qm_files:\n", - " local_max_cells = agipdlib.get_num_cells(f, karabo_id, idx)\n", - " if local_max_cells is not None:\n", - " break\n", + " # or use overriding notebook parameter\n", + " local_mem_cells = mem_cells\n", + "\n", " # maybe we never found this in a sequence file...\n", - " if local_max_cells is None:\n", - " raise ValueError(f\"No raw images found for {qm} for all sequences\")\n", + " if local_mem_cells is None:\n", + " raise ValueError(\n", + " \"No raw images found for \"\n", + " f\"{tools.module_index_to_qm(module_index)}({karabo_da}) for all sequences\")\n", "\n", - " if acq_rate == 0:\n", - " local_acq_rate = agipdlib.get_acq_rate(\n", - " fast_paths=(f, karabo_id, idx), slow_paths=slow_paths)\n", + " if acq_rate == 0.:\n", + " local_acq_rate = agipd_cond.get_acq_rate()\n", " else:\n", " local_acq_rate = acq_rate\n", "\n", @@ -254,7 +261,7 @@ " const_dict = agipdlib.assemble_constant_dict(\n", " corr_bools,\n", " pc_bools,\n", - " local_max_cells,\n", + " local_mem_cells,\n", " bias_voltage,\n", " gain_setting,\n", " local_acq_rate,\n", @@ -271,7 +278,7 @@ " mdata_dict[\"constants\"] = dict()\n", " mdata_dict[\"physical-detector-unit\"] = None # initialization\n", "\n", - " for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items():\n", + " for const_name, (const_init_fun, const_shape, (cond_type, cond_param)) in const_dict.items(): # noqa\n", " if gain_mode and const_name in (\"ThresholdsDark\",):\n", " continue\n", " \n", @@ -280,11 +287,14 @@ " mdata_dict[\"constants\"][const_name] = const_mdata\n", "\n", " if slopes_ff_from_files and const_name in [\"SlopesFF\", \"BadPixelsFF\"]:\n", - " const_mdata[\"file-path\"] = f\"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5\"\n", + " const_mdata[\"file-path\"] = (\n", + " f\"{slopes_ff_from_files}/slopesff_bpmask_module_{tools.module_index_to_qm(module_index)}.h5\") # noqa\n", " const_mdata[\"creation-time\"] = \"00:00:00\"\n", " continue\n", " \n", - " if gain_mode and const_name in (\"BadPixelsPC\", \"SlopesPC\", \"BadPixelsFF\", \"SlopesFF\"):\n", + " if gain_mode and const_name in (\n", + " \"BadPixelsPC\", \"SlopesPC\", \"BadPixelsFF\", \"SlopesFF\"\n", + " ):\n", " param_copy = cond_param.copy()\n", " del param_copy[\"gain_mode\"]\n", " condition = getattr(Conditions, cond_type).AGIPD(**param_copy)\n", @@ -314,7 +324,7 @@ " const_mdata[\"file-path\"] = const_dict[const_name][:2]\n", " const_mdata[\"creation-time\"] = None\n", "\n", - " return qm, mdata_dict, karabo_da, local_acq_rate, local_max_cells" + " return mdata_dict, karabo_da, local_acq_rate, local_mem_cells" ] }, { @@ -333,18 +343,12 @@ "metadata": {}, "outputs": [], "source": [ - "# set everything up filewise\n", - "mapped_files, _, _, _, _ = tools.map_modules_from_folder(\n", - " str(in_folder), run, path_template, karabo_da, sequences\n", - ")\n", - "\n", "pc_bools = [corr_bools.get(\"rel_gain\"),\n", " corr_bools.get(\"adjust_mg_baseline\"),\n", " corr_bools.get('blc_noise'),\n", " corr_bools.get('blc_hmatch'),\n", " corr_bools.get('blc_stripes'),\n", " melt_snow]\n", - "\n", "inp = []\n", "only_dark = False\n", "nodb_with_dark = False\n", @@ -355,20 +359,13 @@ "\n", "da_to_qm = dict()\n", "for module_index, k_da in zip(modules, karabo_da):\n", - " qm = tools.module_index_to_qm(module_index)\n", - " da_to_qm[k_da] = qm\n", - " \n", + " da_to_qm[k_da] = tools.module_index_to_qm(module_index)\n", " if k_da in retrieved_constants:\n", - " print(f\"Constant for {k_da} already in calibration_metadata.yml, won't query again.\")\n", - " continue\n", - " \n", - " if qm in mapped_files and not mapped_files[qm].empty():\n", - " # TODO: make map_modules_from_folder just return list(s)\n", - " qm_files = [Path(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]\n", - " else:\n", + " print(\n", + " f\"Constant for {k_da} already in calibration_metadata.yml, won't query again.\")\n", " continue\n", "\n", - " inp.append((qm_files, qm, k_da, module_index))" + " inp.append((k_da, module_index))" ] }, { @@ -387,8 +384,20 @@ "metadata": {}, "outputs": [], "source": [ - "for qm, md_dict, karabo_da, acq_rate, max_cells in results:\n", + "acq_rate_mods = []\n", + "mem_cells_mods = []\n", + "for md_dict, karabo_da, acq_rate, mem_cells in results:\n", " retrieved_constants[karabo_da] = md_dict\n", + " mem_cells_mods.append(mem_cells)\n", + " acq_rate_mods.append(acq_rate)\n", + "\n", + "# Validate that mem_cells and acq_rate are the same for all modules.\n", + "# TODO: Should a warning be enough?\n", + "if len(set(mem_cells_mods)) != 1 or len(set(acq_rate_mods)) != 1:\n", + " print(\n", + " \"WARNING: Number of memory cells or \"\n", + " \"acquisition rate are not identical for all modules.\\n\"\n", + " f\"mem_cells: {mem_cells_mods}.\\nacq_rate: {acq_rate_mods}.\")\n", "\n", "# check if it is requested not to retrieve any constants from the database\n", "if nodb_with_dark:\n", @@ -399,7 +408,7 @@ " ', '.join([tools.module_index_to_qm(x) for x in modules]))\n", " print(f\"Operating conditions are:\")\n", " print(f\"• Bias voltage: {bias_voltage}\")\n", - " print(f\"• Memory cells: {max_cells}\")\n", + " print(f\"• Memory cells: {mem_cells}\")\n", " print(f\"• Acquisition rate: {acq_rate}\")\n", " print(f\"• Gain mode: {gain_mode.name}\")\n", " print(f\"• Gain setting: {gain_setting}\")\n", @@ -460,5 +469,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb index 8043f4a2199709fc832cf03a1ea898da292bc501..be96463e0e8c90733e0e8532d1df0a732a714cc1 100644 --- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb +++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb @@ -6,7 +6,7 @@ "source": [ "# AGIPD Characterize Dark Images #\n", "\n", - "Author: S. Hauf, Version: 0.1\n", + "Author: European XFEL Detector Group, Version: 2.0\n", "\n", "The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.\n", "\n", @@ -21,7 +21,6 @@ "source": [ "in_folder = \"/gpfs/exfel/d/raw/CALLAB/202031/p900113\" # path to input data, required\n", "out_folder = \"\" # path to output to, required\n", - "sequences = [-1] # sequence files to evaluate.\n", "modules = [-1] # list of modules to evaluate, RANGE ALLOWED\n", "run_high = 9985 # run number in which high gain data was recorded, required\n", "run_med = 9984 # run number in which medium gain data was recorded, required\n", @@ -30,13 +29,10 @@ "\n", "karabo_id = \"HED_DET_AGIPD500K2G\" # karabo karabo_id\n", "karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators\n", - "receiver_id = \"{}CH0\" # inset for receiver devices\n", - "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", - "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", - "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n", + "receiver_template = \"{}CH0\" # inset for receiver devices\n", + "instrument_source_template = '{}/DET/{}:xtdf' # path in the HDF5 file to images\n", + "ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information\n", "karabo_id_control = \"HED_EXP_AGIPD500K2G\" # karabo-id for control device '\n", - "karabo_da_control = \"AGIPD500K2G00\" # karabo DA for control infromation\n", "\n", "use_dir_creation_date = True # use dir creation date as data production reference date\n", "cal_db_interface = \"tcp://max-exfl016:8020\" # the database interface to use\n", @@ -45,12 +41,12 @@ "db_output = False # output constants to database\n", "\n", "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", - "bias_voltage = 0 # detector bias voltage\n", - "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n", + "bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.\n", + "gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.\n", + "gain_mode = -1 # gain mode, use -1 to use value stored in slow data.\n", "integration_time = -1 # integration time, negative values for auto-detection.\n", "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n", "interlaced = False # assume interlaced data format, for data prior to Dec. 2017\n", - "rawversion = 2 # RAW file format version\n", "\n", "thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels\n", "thresholds_offset_hard = [0, 0] # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.\n", @@ -67,8 +63,9 @@ "thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels\n", "thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels\n", "\n", - "thresholds_gain_sigma = 5. # Gain separation sigma threshold\n", - "\n", + "thresholds_gain_sigma = 5. # Gain separation sigma threshold\n", + "max_trains = 0 # Maximum number of trains to use for processing dark. Set to 0 to process all available trains.\n", + "min_trains = 1 # Miniumum number of trains for processing dark. If raw folder has less than minimum trains processing is stopped.\n", "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells\n", "\n", "# This is used if modules is not specified:\n", @@ -96,28 +93,24 @@ "import os\n", "from collections import OrderedDict\n", "from datetime import timedelta\n", - "from typing import Tuple\n", + "from pathlib import Path\n", + "from typing import List, Tuple\n", "\n", "import dateutil.parser\n", - "import h5py\n", "import matplotlib\n", "import numpy as np\n", "import pasha as psh\n", + "import psutil\n", "import tabulate\n", "import yaml\n", + "from IPython.display import Latex, Markdown, display\n", + "from extra_data import RunDirectory\n", "\n", "matplotlib.use('agg')\n", "\n", "import iCalibrationDB\n", "import matplotlib.pyplot as plt\n", - "from cal_tools.agipdlib import (\n", - " get_acq_rate,\n", - " get_bias_voltage,\n", - " get_gain_mode,\n", - " get_gain_setting,\n", - " get_integration_time,\n", - " get_num_cells,\n", - ")\n", + "from cal_tools.agipdlib import AgipdCtrl\n", "from cal_tools.enums import AgipdGainMode, BadPixels\n", "from cal_tools.plotting import (\n", " create_constant_overview,\n", @@ -137,7 +130,6 @@ " save_const_to_h5,\n", " send_to_db,\n", ")\n", - "from IPython.display import Latex, Markdown, display\n", "\n", "%matplotlib inline" ] @@ -149,14 +141,19 @@ "outputs": [], "source": [ "# insert control device if format string (does nothing otherwise)\n", - "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", - "\n", - "max_cells = mem_cells\n", - "\n", - "offset_runs = OrderedDict()\n", - "offset_runs[\"high\"] = run_high\n", - "offset_runs[\"med\"] = run_med\n", - "offset_runs[\"low\"] = run_low\n", + "ctrl_src = ctrl_source_template.format(karabo_id_control)\n", + "\n", + "runs_dict = OrderedDict()\n", + "\n", + "for gain_idx, (run_name, run_number) in enumerate(zip(\n", + " [\"high\", \"med\", \"low\"],\n", + " [run_high, run_med, run_low]\n", + ")):\n", + " runs_dict[run_name] = {\n", + " \"number\": run_number,\n", + " \"gain\": gain_idx,\n", + " \"dc\": RunDirectory(f'{in_folder}/r{run_number:04d}/')\n", + " }\n", "\n", "creation_time=None\n", "if use_dir_creation_date:\n", @@ -166,6 +163,10 @@ "\n", "run, prop, seq = run_prop_seq_from_path(in_folder)\n", "\n", + "# Read report path and create file location tuple to add with the injection\n", + "file_loc = f\"proposal:{prop} runs:{run_low} {run_med} {run_high}\"\n", + "\n", + "report = get_report(out_folder)\n", "cal_db_interface = get_random_db_interface(cal_db_interface)\n", "print(f'Calibration database interface: {cal_db_interface}')\n", "\n", @@ -181,31 +182,18 @@ " dinstance = \"AGIPD500K\"\n", " nmods = 8\n", "\n", - "if sequences == [-1]:\n", - " sequences = None\n", - "control_names = [f'{in_folder}/r{r:04d}/RAW-R{r:04d}-{karabo_da_control}-S00000.h5'\n", - " for r in (run_high, run_med, run_low)]\n", + "instrument_src = instrument_source_template.format(karabo_id, receiver_template)\n", + "run_numbers = [run_high, run_med, run_low]\n", "\n", - "if operation_mode not in (\"ADAPTIVE_GAIN\", \"FIXED_GAIN\"):\n", - " print(f\"WARNING: unknown operation_mode \\\"{operation_mode}\\\" parameter set\")\n", - "run_gain_modes = [get_gain_mode(fn, h5path_ctrl) for fn in control_names]\n", - "if all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in run_gain_modes):\n", - " fixed_gain_mode = False\n", - " if operation_mode == \"FIXED_GAIN\":\n", - " print(\"WARNING: operation_mode parameter is FIXED_GAIN, slow data indicates adaptive gain\")\n", - "elif run_gain_modes == [AgipdGainMode.FIXED_HIGH_GAIN, AgipdGainMode.FIXED_MEDIUM_GAIN, AgipdGainMode.FIXED_LOW_GAIN]:\n", - " if operation_mode == \"ADAPTIVE_GAIN\":\n", - " print(\"WARNING: operation_mode parameter ix ADAPTIVE_GAIN, slow data indicates fixed gain\")\n", - " fixed_gain_mode = True\n", - "else:\n", - " print(f'Something is clearly wrong; slow data indicates gain modes {run_gain_modes}')\n", + "def create_karabo_da_list(modules):\n", + " return([\"AGIPD{:02d}\".format(i) for i in modules])\n", "\n", - "if integration_time < 0:\n", - " integration_times = [get_integration_time(fn, h5path_ctrl) for fn in control_names]\n", - " if len(set(integration_times)) > 1:\n", - " print(f'WARNING: integration time is not constant across the specified dark runs')\n", - "\n", - "integration_time = integration_times[0]\n", + "if karabo_da[0] == '-1':\n", + " if modules[0] == -1:\n", + " modules = list(range(nmods))\n", + " karabo_da = create_karabo_da_list(modules)\n", + "else:\n", + " modules = [int(x[-2:]) for x in karabo_da]\n", "\n", "print(f\"Detector in use is {karabo_id}\")\n", "print(f\"Instrument {instrument}\")\n", @@ -218,30 +206,85 @@ "metadata": {}, "outputs": [], "source": [ - "runs = [run_high, run_med, run_low]\n", - "\n", - "if gain_setting == 0.1:\n", - " if creation_time.replace(tzinfo=None) < dateutil.parser.parse('2020-01-31'):\n", - " print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n", - " gain_setting = None\n", + "# Create out_folder if it doesn't exist.\n", + "Path(out_folder).mkdir(parents=True, exist_ok=True)\n", + "\n", + "n_files = 0\n", + "total_file_sizes = 0\n", + "max_trains_list = []\n", + "\n", + "for run_dict in runs_dict.values():\n", + " missing_modules = []\n", + " image_dc = run_dict[\"dc\"].select(f\"{karabo_id_control}*\", \"*\", require_all=True)\n", + " # This is important in case of no slurm parallelization over modules is done.\n", + " # (e.g. running notebook interactively)\n", + " sources_l = [(f\"{karabo_id_control}*\", \"*\")]\n", + " sources_l += [(instrument_src.format(m), \"*\") for m in modules]\n", + " image_dc = run_dict[\"dc\"].select(sources_l, require_all=True)\n", + " # validate that there are trains and that data sources are\n", + " # present for any of the selected modules.\n", + " if (\n", + " len(image_dc.train_ids) == 0 or\n", + " not np.any([\n", + " karabo_id in s for s in run_dict[\"dc\"].select(sources_l, require_all=True).all_sources]) # noqa\n", + " ):\n", + " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", + "\n", + " max_trains_list.append(len(image_dc.train_ids))\n", + "\n", + " # update run_dc with selected module sources\n", + " run_dict[\"dc\"] = image_dc\n", + "\n", + "# Update modules and karabo_da lists based on available modules to processes.\n", + "modules = [m for m in modules if m not in missing_modules]\n", + "karabo_da = create_karabo_da_list(modules)\n", + "\n", + "# Remodifing run data collections to display actual total files number and size. \n", + "for run_dict in runs_dict.values():\n", + " file_sizes = [os.path.getsize(f.filename) / 1e9 for f in run_dict[\"dc\"].deselect(f\"{karabo_id_control}*\").files]\n", + " total_file_sizes += sum(file_sizes)\n", + " n_files += len(file_sizes)\n", + "\n", + "print(f\"Will process data in a total of {n_files} files ({total_file_sizes:.02f} GB).\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read and validate the runs control data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_run_conditions(runs_dict: dict):\n", + " agipd_cond = AgipdCtrl(\n", + " run_dc=runs_dict[\"dc\"],\n", + " image_src=instrument_src_mod,\n", + " ctrl_src=ctrl_src,\n", + " )\n", + " cond_dict[\"runs\"].append(runs_dict[\"number\"])\n", + " if acq_rate == 0:\n", + " cond_dict[\"acq_rate\"].append(agipd_cond.get_acq_rate())\n", + " if mem_cells == 0:\n", + " cond_dict[\"mem_cells\"].append(agipd_cond.get_num_cells())\n", + " if gain_setting == -1: \n", + " cond_dict[\"gain_setting\"].append(\n", + " agipd_cond.get_gain_setting(creation_time))\n", + " if bias_voltage == 0.:\n", + " cond_dict[\"bias_voltage\"].append(\n", + " agipd_cond.get_bias_voltage(karabo_id_control))\n", + " if integration_time == -1:\n", + " cond_dict[\"integration_time\"].append(\n", + " agipd_cond.get_integration_time())\n", + " if gain_mode == -1:\n", + " cond_dict[\"gain_mode\"].append(agipd_cond.get_gain_mode())\n", " else:\n", - " try:\n", - " # extract gain setting and validate that all runs have the same setting\n", - " gsettings = []\n", - " for r in runs:\n", - " control_fname = '{}/r{:04d}/RAW-R{:04d}-{}-S00000.h5'.format(in_folder, r, r,\n", - " karabo_da_control)\n", - " gsettings.append(get_gain_setting(control_fname, h5path_ctrl))\n", - " if not all(g == gsettings[0] for g in gsettings):\n", - " raise ValueError(f\"Different gain settings for the 3 input runs {gsettings}\")\n", - " gain_setting = gsettings[0]\n", - " except Exception as e:\n", - " print(f'Error while reading gain setting from: \\n{control_fname}')\n", - " print(f'Error: {e}')\n", - " if \"component not found\" in str(e):\n", - " print(\"Gain setting is not found in the control information\")\n", - " print(\"Data will not be processed\")\n", - " sequences = []" + " cond_dict[\"gain_mode\"].append(AgipdGainMode(gain_mode))" ] }, { @@ -250,26 +293,116 @@ "metadata": {}, "outputs": [], "source": [ - "if karabo_da[0] == '-1':\n", - " if modules[0] == -1:\n", - " modules = list(range(nmods))\n", - " karabo_da = [\"AGIPD{:02d}\".format(i) for i in modules]\n", - "else:\n", - " modules = [int(x[-2:]) for x in karabo_da]\n", - "h5path = h5path.format(karabo_id, receiver_id)\n", - "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n", - "\n", - "if bias_voltage == 0:\n", - " # Read the bias voltage from files, if recorded.\n", - " # If not available, make use of the historical voltage the detector is running at\n", - " bias_voltage = get_bias_voltage(control_names[0], karabo_id_control)\n", - " bias_voltage = bias_voltage if bias_voltage is not None else 300\n", + "def validate_gain_modes(gain_modes: List[AgipdGainMode]):\n", + " # Validate that gain modes are not a mix of adaptive and fixed gain.\n", + " if all(\n", + " gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes\n", + " ):\n", + " fixed_gain_mode = False\n", + " elif any(\n", + " gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes\n", + " ):\n", + " raise ValueError(\n", + " f\"ERROR: Given runs {self.read_conditions['run_number']}\"\n", + " \" have a mix of ADAPTIVE and FIXED gain modes: \"\n", + " f\"{self.read_conditions['gain_mode']}.\"\n", + " )\n", + " else:\n", + " fixed_gain_mode = True\n", + " return fixed_gain_mode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read slow data from 1st channel only.\n", + "# Read all modules in one notebook and validate the conditions across detectors?\n", + "# Currently slurm jobs run per one module.\n", + "\n", + "# TODO: what if first module is not available. Maybe only channel 2 available\n", + "instrument_src_mod = instrument_src.format(modules[0])\n", + "\n", + "cond_dict = dict()\n", + "fixed_gain_mode = None\n", + "\n", + "with multiprocessing.Manager() as manager:\n", + " cond_dict[\"runs\"] = manager.list()\n", + " cond_dict[\"acq_rate\"] = manager.list()\n", + " cond_dict[\"mem_cells\"] = manager.list()\n", + " cond_dict[\"gain_setting\"] = manager.list()\n", + " cond_dict[\"gain_mode\"] = manager.list()\n", + " cond_dict[\"bias_voltage\"] = manager.list()\n", + " cond_dict[\"integration_time\"] = manager.list()\n", + "\n", + " with multiprocessing.Pool(processes=len(modules)) as pool:\n", + " pool.starmap(read_run_conditions, zip(runs_dict.values()))\n", + "\n", + " for cond, vlist in cond_dict.items():\n", + " if cond == \"runs\":\n", + " continue\n", + " elif cond == \"gain_mode\":\n", + " fixed_gain_mode = validate_gain_modes(cond_dict[\"gain_mode\"])\n", + " if not all(x == vlist[0] for x in vlist):\n", + " # TODO: raise ERROR??\n", + " print(\n", + " f\"WARNING: {cond} is not the same for the runs \"\n", + " f\"{cond_dict['runs']} with values\"\n", + " f\" of {cond_dict[cond]}, respectively.\"\n", + " )\n", + " if cond_dict[\"acq_rate\"]: acq_rate = cond_dict[\"acq_rate\"][0]\n", + " if cond_dict[\"mem_cells\"]: mem_cells = cond_dict[\"mem_cells\"][0]\n", + " if cond_dict[\"gain_setting\"]: gain_setting = cond_dict[\"gain_setting\"][0]\n", + " if cond_dict[\"gain_mode\"]: gain_mode = list(cond_dict[\"gain_mode\"])\n", + " if cond_dict[\"bias_voltage\"]: bias_voltage = cond_dict[\"bias_voltage\"][0]\n", + " if cond_dict[\"integration_time\"]: integration_time = cond_dict[\"integration_time\"][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Determine the gain operation mode based on the gain_mode stored in control h5file.\n", + "if operation_mode not in (\"ADAPTIVE_GAIN\", \"FIXED_GAIN\"):\n", + " print(f\"WARNING: unknown operation_mode \\\"{operation_mode}\\\" parameter set\")\n", "\n", + "if (\n", + " gain_mode == [\n", + " AgipdGainMode.FIXED_HIGH_GAIN,\n", + " AgipdGainMode.FIXED_MEDIUM_GAIN,\n", + " AgipdGainMode.FIXED_LOW_GAIN\n", + " ] and\n", + " operation_mode == \"ADAPTIVE_GAIN\"\n", + "):\n", + " print(\n", + " \"WARNING: operation_mode parameter is ADAPTIVE_GAIN, \"\n", + " \"slow data indicates FIXED_GAIN.\")\n", + "elif not fixed_gain_mode and operation_mode == \"FIXED_GAIN\":\n", + " print(\n", + " \"WARNING: operation_mode parameter is FIXED_GAIN, \"\n", + " \"slow data indicates ADAPTIVE_GAIN\")\n", + "elif not all(gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_mode):\n", + " raise ValueError(\n", + " \"ERROR: Wrong arrangment of given dark runs. \"\n", + " f\"Given runs' gain_modes are {gain_mode} for runs: {runs}.\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "print(\"Parameters are:\")\n", "print(f\"Proposal: {prop}\")\n", - "print(f\"Memory cells: {mem_cells}/{max_cells}\")\n", - "print(\"Runs: {}\".format([v for v in offset_runs.values()]))\n", - "print(f\"Sequences: {sequences if sequences else 'All'}\")\n", + "print(f\"Acquisition rate: {acq_rate}\")\n", + "print(f\"Memory cells: {mem_cells}\")\n", + "print(f\"Runs: {run_numbers}\")\n", "print(f\"Interlaced mode: {interlaced}\")\n", "print(f\"Using DB: {db_output}\")\n", "print(f\"Input: {in_folder}\")\n", @@ -315,56 +448,28 @@ " ]" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The following lines will create a queue of files which will the be executed module-parallel. Distiguishing between different gains." - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# set everything up filewise\n", - "os.makedirs(out_folder, exist_ok=True)\n", - "gain_mapped_files, _, total_file_size = map_gain_stages(\n", - " in_folder, offset_runs, path_template, karabo_da, sequences\n", - ")\n", - "# TODO: Keep this commented out to use it later again, this is false information at the moment.\n", - "# print(f\"Will process a total of {total_files} files ({total_file_size:.02f} GB).\")\n", - "\n", - "# TODO: Remove all of this nonsense with Extra-data.\n", - "inp = []\n", - "for gain_index, (gain, qm_file_map) in enumerate(gain_mapped_files.items()):\n", - " selected_inp = None\n", - " for module_index in modules:\n", - " max_n_imgs = 0\n", - " qm = module_index_to_qm(module_index)\n", - " if qm not in qm_file_map:\n", - " print(f\"Did not find files for {qm}\")\n", - " continue\n", - " file_queue = qm_file_map[qm]\n", - " while not file_queue.empty():\n", - " filename = file_queue.get()\n", - " # TODO: remove after using EXtra-data to read files\n", - " # and skip empty trains.\n", - " with h5py.File(filename, \"r\") as fin:\n", - " n_imgs = fin[h5path.format(module_index)+\"/trainId\"].shape[0]\n", - " if n_imgs != 0 and n_imgs > max_n_imgs:\n", - " selected_inp = (filename, module_index, gain_index)\n", - " max_n_imgs = n_imgs\n", - " print(f\"Process {selected_inp[0]} for {qm}\")\n", - " inp.append(selected_inp)\n", - " if selected_inp is None:\n", - " raise ValueError(\n", - " \"No images to process for run: \"\n", - " f\"{[v for v in offset_runs.values()][gain_index]}\"\n", - " )\n", - "\n", - "total_files = len(inp)" + "# Check if max_trains can be processed.\n", + "\n", + "# more relevant if running on multiple modules (i.e. within notebook)\n", + "# mem_cells * gains * n_constants * modules * agipd_[x,y]image_size * 2\n", + "av_mem = psutil.virtual_memory().available\n", + "possible_trains = av_mem // (352 * 3 * 3 * len(modules) * 131072 * 2)\n", + "if max_trains == 0:\n", + " max_trains = max(max_trains_list)\n", + "if max_trains > possible_trains:\n", + " max_trains = possible_trains\n", + " print(\n", + " f\"WARNING: available memory for processing is { av_mem / 1e9:.02f} GB.\"\n", + " f\" Modifing max_trains to process to {max_trains}\")\n", + "\n", + "for run_dict in runs_dict.values():\n", + " run_dict[\"dc\"] = run_dict[\"dc\"].select_trains(np.s_[:max_trains])" ] }, { @@ -382,49 +487,31 @@ "metadata": {}, "outputs": [], "source": [ - "# min() only relevant if running on multiple modules (i.e. within notebook)\n", - "parallel_num_procs = min(12, total_files)\n", + "parallel_num_procs = min(12, len(modules)*3)\n", "parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs\n", "print(f\"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each\")\n", "\n", - "\n", "def characterize_module(\n", - " fast_data_filename: str, channel: int, gain_index: int\n", - ") -> Tuple[np.array, np.array, np.array, np.array, int, np.array, int, float]:\n", - " if max_cells == 0:\n", - " num_cells = get_num_cells(fast_data_filename, karabo_id, channel)\n", - " else:\n", - " num_cells = max_cells\n", + " channel: int, runs_dict: dict,\n", + ") -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:\n", "\n", - " if acq_rate == 0.:\n", - " slow_paths = control_names[gain_index], karabo_id_control\n", - " fast_paths = fast_data_filename, karabo_id, channel\n", - " local_acq_rate = get_acq_rate(fast_paths, slow_paths)\n", - " else:\n", - " local_acq_rate = acq_rate\n", + " # Select the corresponding module channel.\n", + " instrument_src_mod = instrument_src.format(channel)\n", "\n", - " local_thresholds_offset_hard = thresholds_offset_hard[gain_index]\n", - " local_thresholds_noise_hard = thresholds_noise_hard[gain_index]\n", + " run_dc = runs_dict[\"dc\"]\n", + " gain_index = runs_dict[\"gain\"]\n", "\n", - " h5path_f = h5path.format(channel)\n", - " h5path_idx_f = h5path_idx.format(channel)\n", + " if run_dc[instrument_src_mod, \"image.data\"].shape[0] < min_trains:\n", + " print(\n", + " f\"WARNING: {run_dc.files} have less than \"\n", + " \"minimum trains: {min_trains}.\")\n", "\n", - " with h5py.File(fast_data_filename, \"r\") as infile:\n", - " if rawversion == 2:\n", - " count = np.squeeze(infile[f\"{h5path_idx_f}/count\"])\n", - " first = np.squeeze(infile[f\"{h5path_idx_f}/first\"])\n", - " last_index = int(first[count != 0][-1]+count[count != 0][-1])\n", - " first_index = int(first[count != 0][0])\n", - " else:\n", - " status = np.squeeze(infile[f\"{h5path_idx_f}/status\"])\n", - " if np.count_nonzero(status != 0) == 0:\n", - " return\n", - " last = np.squeeze(infile[f\"{h5path_idx_f}/last\"])\n", - " first = np.squeeze(infile[f\"{h5path_idx_f}/first\"])\n", - " last_index = int(last[status != 0][-1]) + 1\n", - " first_index = int(first[status != 0][0])\n", - " im = np.array(infile[f\"{h5path_f}/data\"][first_index:last_index,...])\n", - " cell_ids = np.squeeze(infile[f\"{h5path_f}/cellId\"][first_index:last_index,...])\n", + " # Read module's image and cellId data.\n", + " im = run_dc[instrument_src_mod, \"image.data\"].ndarray()\n", + " cell_ids = np.squeeze(run_dc[instrument_src_mod, \"image.cellId\"].ndarray())\n", + "\n", + " local_thresholds_offset_hard = thresholds_offset_hard[gain_index]\n", + " local_thresholds_noise_hard = thresholds_noise_hard[gain_index] \n", "\n", " if interlaced:\n", " if not fixed_gain_mode:\n", @@ -435,13 +522,12 @@ " if not fixed_gain_mode:\n", " ga = im[:, 1, ...]\n", " im = im[:, 0, ...].astype(np.float32)\n", - "\n", " im = np.transpose(im)\n", " if not fixed_gain_mode:\n", " ga = np.transpose(ga)\n", "\n", " context = psh.context.ThreadContext(num_workers=parallel_num_threads)\n", - " offset = context.alloc(shape=(im.shape[0], im.shape[1], num_cells), dtype=np.float64)\n", + " offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)\n", " noise = context.alloc(like=offset)\n", "\n", " if fixed_gain_mode:\n", @@ -460,7 +546,6 @@ " ga_slice = ga[..., cell_slice_index]\n", " gains[..., cell_number] = np.median(ga_slice, axis=2)\n", " gains_std[..., cell_number] = np.std(ga_slice, axis=2)\n", - "\n", " context.map(process_cell, np.unique(cell_ids))\n", "\n", " # bad pixels\n", @@ -483,7 +568,7 @@ " bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD\n", " bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR\n", "\n", - " return offset, noise, gains, gains_std, bp, num_cells, local_acq_rate" + " return channel, gain_index, offset, noise, gains, gains_std, bp" ] }, { @@ -493,7 +578,18 @@ "outputs": [], "source": [ "with multiprocessing.Pool(processes=parallel_num_procs) as pool:\n", - " results = pool.starmap(characterize_module, inp)" + " results = pool.starmap(\n", + " characterize_module, itertools.product(modules, list(runs_dict.values())))\n", + "\n", + "# mapped values for processing 2 modules example:\n", + "# [\n", + "# 0, {\"gain\": 0, \"run_number\": <run-high>, \"dc\": <high-dc>},\n", + "# 0, {\"gain\": 1, \"run_number\": <run-med>, \"dc\": <med-dc>},\n", + "# 0, {\"gain\": 2, \"run_number\": <run-low>, \"dc\": <low-dc>},\n", + "# 1, {\"gain\": 0, \"run_number\": <run-high>, \"dc\": <high-dc>},\n", + "# 1, {\"gain\": 1, \"run_number\": <run-med>, \"dc\": <med-dc>},\n", + "# 1, {\"gain\": 2, \"run_number\": <run-low>, \"dc\": <low-dc>},\n", + "# ]" ] }, { @@ -509,13 +605,8 @@ " gain_g = OrderedDict()\n", " gainstd_g = OrderedDict()\n", "\n", - "all_cells = []\n", - "all_acq_rate = []\n", "\n", - "for (_, module_index, gain_index), (offset, noise, gains, gains_std, bp,\n", - " thiscell, thisacq) in zip(inp, results):\n", - " all_cells.append(thiscell)\n", - " all_acq_rate.append(thisacq)\n", + "for module_index, gain_index, offset, noise, gains, gains_std, bp in results:\n", " qm = module_index_to_qm(module_index)\n", " if qm not in offset_g:\n", " offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))\n", @@ -530,13 +621,7 @@ " badpix_g[qm][..., gain_index] = bp\n", " if not fixed_gain_mode:\n", " gain_g[qm][..., gain_index] = gains\n", - " gainstd_g[qm][..., gain_index] = gains_std\n", - "\n", - "\n", - "max_cells = np.max(all_cells)\n", - "print(f\"Using {max_cells} memory cells\")\n", - "acq_rate = np.max(all_acq_rate)\n", - "print(f\"Using {acq_rate} MHz acquisition rate\")" + " gainstd_g[qm][..., gain_index] = gains_std" ] }, { @@ -598,19 +683,6 @@ " res[qm]['ThresholdsDark'] = thresholds_g[qm]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Read report path and create file location tuple to add with the injection\n", - "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n", - "file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)\n", - "\n", - "report = get_report(out_folder)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -620,7 +692,7 @@ "# set the operating condition\n", "# note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None\n", "condition = iCalibrationDB.Conditions.Dark.AGIPD(\n", - " memory_cells=max_cells,\n", + " memory_cells=mem_cells,\n", " bias_voltage=bias_voltage,\n", " acquisition_rate=acq_rate,\n", " gain_setting=gain_setting,\n", @@ -643,7 +715,7 @@ " constant=iCalibrationDB.CalibrationConstant(),\n", " condition=condition,\n", " cal_db_interface=cal_db_interface,\n", - " snapshot_at=creation_time.isoformat(),\n", + " snapshot_at=creation_time.isoformat() if creation_time else None,\n", " timeout=cal_db_timeout\n", ")\n", "for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):\n", @@ -654,6 +726,13 @@ " }" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sending calibration constants to the database." + ] + }, { "cell_type": "code", "execution_count": null, @@ -678,11 +757,18 @@ " file_loc, report, creation_time, out_folder)\n", " print(f\"Calibration constant {const} for {qm} is stored locally in {file_loc}.\\n\")\n", "\n", - " print(\"Constants parameter conditions are:\\n\")\n", - " print(f\"• memory_cells: {max_cells}\\n• bias_voltage: {bias_voltage}\\n\"\n", - " f\"• acquisition_rate: {acq_rate}\\n• gain_setting: {gain_setting}\\n\"\n", - " f\"• gain_mode: {fixed_gain_mode}\\n• integration_time: {integration_time}\\n\"\n", - " f\"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")" + "print(\"Constants parameter conditions are:\\n\")\n", + "print(f\"• memory_cells: {mem_cells}\\n• bias_voltage: {bias_voltage}\\n\"\n", + " f\"• acquisition_rate: {acq_rate}\\n• gain_setting: {gain_setting}\\n\"\n", + " f\"• gain_mode: {fixed_gain_mode}\\n• integration_time: {integration_time}\\n\"\n", + " f\"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retrieving previous calibration constants for comparison." ] }, { @@ -698,11 +784,6 @@ "def retrieve_old_constant(qm, const):\n", " dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()\n", "\n", - " # This should be used in case of running notebook\n", - " # by a different method other than myMDC which already\n", - " # sends CalCat info.\n", - " # TODO: Set db_module to \"\" by default in the first cell\n", - "\n", " data, mdata = get_from_db(\n", " karabo_id=karabo_id,\n", " karabo_da=qm_dict[qm][\"karabo_da\"],\n", @@ -710,7 +791,7 @@ " condition=condition,\n", " empty_constant=None,\n", " cal_db_interface=cal_db_interface,\n", - " creation_time=creation_time-timedelta(seconds=1),\n", + " creation_time=creation_time-timedelta(seconds=1) if creation_time else None,\n", " strategy=\"pdu_prior_in_time\",\n", " verbosity=1,\n", " timeout=cal_db_timeout\n", @@ -776,7 +857,7 @@ "source": [ "cell = 3\n", "gain = 0\n", - "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*offset_runs.values()))" + "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*run_numbers))" ] }, { @@ -794,7 +875,7 @@ "source": [ "cell = 3\n", "gain = 1\n", - "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*offset_runs.values()))" + "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*run_numbers))" ] }, { @@ -812,7 +893,7 @@ "source": [ "cell = 3\n", "gain = 2\n", - "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*offset_runs.values()))" + "show_overview(res, cell, gain, infix=\"{}-{}-{}\".format(*run_numbers))" ] }, { @@ -856,6 +937,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "\n", "## Aggregate values, and per Cell behaviour ##\n", "\n", "The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior." @@ -867,7 +949,7 @@ "metadata": {}, "outputs": [], "source": [ - "create_constant_overview(offset_g, \"Offset (ADU)\", max_cells, 4000, 8000,\n", + "create_constant_overview(offset_g, \"Offset (ADU)\", mem_cells, 4000, 8000,\n", " badpixels=[badpix_g, np.nan])" ] }, @@ -877,7 +959,7 @@ "metadata": {}, "outputs": [], "source": [ - "create_constant_overview(noise_g, \"Noise (ADU)\", max_cells, 0, 100,\n", + "create_constant_overview(noise_g, \"Noise (ADU)\", mem_cells, 0, 100,\n", " badpixels=[badpix_g, np.nan])" ] }, @@ -895,7 +977,7 @@ " bp_thresh[mod][...,:2] = con[...,:2]\n", " bp_thresh[mod][...,2:] = con\n", "\n", - " create_constant_overview(thresholds_g, \"Threshold (ADU)\", max_cells, 4000, 10000, 5,\n", + " create_constant_overview(thresholds_g, \"Threshold (ADU)\", mem_cells, 4000, 10000, 5,\n", " badpixels=[bp_thresh, np.nan],\n", " gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],\n", " marker=['d','d','','','']\n", @@ -911,7 +993,7 @@ "bad_pixel_aggregate_g = OrderedDict()\n", "for m, d in badpix_g.items():\n", " bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)\n", - "create_constant_overview(bad_pixel_aggregate_g, \"Bad pixel fraction\", max_cells, 0, 0.10, 3)" + "create_constant_overview(bad_pixel_aggregate_g, \"Bad pixel fraction\", mem_cells, 0, 0.10, 3)" ] }, { @@ -1120,7 +1202,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.11" + "version": "3.8.12" } }, "nbformat": 4, diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb index 021e16ec615b20c9ecf79b9570d87bcd0387f2c0..8854d0f5c6d0ca5791829d12ecd84c775bbbd77a 100644 --- a/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb +++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_FlatFields_Summary.ipynb @@ -22,13 +22,8 @@ "run = 449 # runs of image data used to create histograms\n", "\n", "karabo_id = \"MID_DET_AGIPD1M-1\" # karabo karabo_id\n", - "receiver_id = \"{}CH0\" # inset for receiver devices\n", - "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", - "h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", - "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information\n", - "karabo_id_control = \"MID_IRU_AGIPD1M1\" # karabo-id for control device\n", - "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n", + "ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information\n", + "karabo_id_control = \"MID_EXP_AGIPD1M1\" # karabo-id for control device\n", "\n", "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n", @@ -56,9 +51,9 @@ "\n", "# Detector conditions\n", "max_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", - "bias_voltage = 300 # Bias voltage\n", + "bias_voltage = 0. # Bias voltage\n", "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n", - "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n", + "gain_setting = -1 # the gain setting, use 0.1 to try to auto-determine\n", "photon_energy = 8.05 # photon energy in keV\n", "integration_time = -1 # integration time, negative values for auto-detection." ] @@ -80,13 +75,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import tabulate\n", - "from cal_tools.agipdlib import (\n", - " get_acq_rate,\n", - " get_bias_voltage,\n", - " get_gain_setting,\n", - " get_integration_time,\n", - " get_num_cells,\n", - ")\n", + "from cal_tools.agipdlib import AgipdCtrl\n", "from cal_tools.agipdutils_ff import (\n", " BadPixelsFF,\n", " any_in,\n", @@ -104,7 +93,7 @@ " send_to_db\n", ")\n", "from dateutil import parser\n", - "from extra_data import RunDirectory, stack_detector_data\n", + "from extra_data import H5File, RunDirectory, stack_detector_data\n", "from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry\n", "from iCalibrationDB import Conditions, Constants, Detectors\n", "from iminuit import Minuit\n", @@ -132,45 +121,38 @@ "outputs": [], "source": [ "# Get operation conditions\n", - "filename = glob.glob(f\"{raw_folder}/r{run:04d}/*-AGIPD[0-1][0-9]-*\")[0]\n", - "channel = int(re.findall(r\".*-AGIPD([0-9]+)-.*\", filename)[0])\n", - "control_fname = f'{raw_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", - "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", - "\n", - "# Evaluate number of memory cells\n", - "mem_cells = get_num_cells(filename, karabo_id, channel)\n", - "if mem_cells is None:\n", - " raise ValueError(f\"No raw images found in {filename}\")\n", + "ctrl_source = ctrl_source_template.format(karabo_id_control)\n", "\n", - "# Evaluate aquisition rate\n", - "fast_paths = (filename, karabo_id, channel)\n", - "slow_paths = (control_fname, karabo_id_control)\n", - "\n", - "if acq_rate == 0.:\n", - " acq_rate = get_acq_rate(fast_paths,slow_paths)\n", + "raw_dc = RunDirectory(f'{raw_folder}/r{run:04d}/')\n", "\n", + "# Read operating conditions from AGIPD00 files\n", + "instrument_src_mod = [\n", + " s for s in list(raw_dc.all_sources) if \"0CH\" in s][0]\n", + "ctrl_src = [\n", + " s for s in list(raw_dc.all_sources) if ctrl_source in s][0]\n", "# Evaluate creation time\n", "creation_time = None\n", "if use_dir_creation_date:\n", " creation_time = get_dir_creation_date(raw_folder, run)\n", - " \n", - "# Evaluate gain setting\n", - "if gain_setting == 0.1:\n", - " if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n", - " print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n", - " gain_setting = None\n", - " else:\n", - " try:\n", - " gain_setting = get_gain_setting(control_fname, h5path_ctrl)\n", - " except Exception as e:\n", - " print(f'Error while reading gain setting from: \\n{control_fname}')\n", - " print(e)\n", - " print(\"Set gain settion to 0\")\n", - " gain_setting = 0\n", - "\n", - "# Evaluate integration time\n", - "if integration_time < 0:\n", - " integration_time = get_integration_time(control_fname, h5path_ctrl)\n", + "\n", + "agipd_cond = AgipdCtrl(\n", + " run_dc=raw_dc,\n", + " image_src=instrument_src_mod,\n", + " ctrl_src=ctrl_src,\n", + " raise_error=False, # to be able to process very old data without mosetting value\n", + ")\n", + "\n", + "mem_cells = agipd_cond.get_num_cells()\n", + "if mem_cells is None:\n", + " raise ValueError(f\"No raw images found in {raw_dc[instrument_src_mod].files}\")\n", + "if acq_rate == 0.:\n", + " acq_rate = agipd_cond.get_acq_rate()\n", + "if gain_setting == -1:\n", + " gain_setting = agipd_cond.get_gain_setting(creation_time)\n", + "if bias_voltage == 0.:\n", + " bias_voltage = agipd_cond.get_bias_voltage(karabo_id_control)\n", + "if integration_time == -1:\n", + " integration_time = agipd_cond.get_integration_time()\n", "\n", "# Evaluate detector instance for mapping\n", "instrument = karabo_id.split(\"_\")[0]\n", diff --git a/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb b/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb index 6fbfb3d357a1bbf6fc04212144c1ee7b2227efcb..e204cf3f605f20c7342638395402119b7f8e7864 100644 --- a/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb +++ b/notebooks/generic/overallmodules_Darks_Summary_NBC.ipynb @@ -6,8 +6,9 @@ "metadata": {}, "outputs": [], "source": [ - "#Author: K. Ahmed, M. Karnevsky, Version: 0.1\n", - "#The following is a summary for the processing of dark images and calibration constants production.\n", + "# Author: European XFEL Detector Group, Version: 1.0\n", + "\n", + "# Summary for processed of dark calibration constants and a comparison with previous injected constants.\n", "\n", "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/fixed_gain/SPB_summary_fix2\" # path to output to, required\n", "karabo_id = \"SPB_DET_AGIPD1M-1\" # detector instance\n", @@ -25,7 +26,6 @@ "import os\n", "import warnings\n", "from collections import OrderedDict\n", - "from datetime import datetime\n", "from pathlib import Path\n", "\n", "warnings.filterwarnings('ignore')\n", @@ -40,7 +40,6 @@ "\n", "matplotlib.use(\"agg\")\n", "import matplotlib.gridspec as gridspec\n", - "import matplotlib.patches as patches\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", @@ -49,8 +48,6 @@ "from cal_tools.ana_tools import get_range\n", "from cal_tools.plotting import show_processed_modules\n", "from cal_tools.tools import CalibrationMetadata, module_index_to_qm\n", - "from iCalibrationDB import Detectors\n", - "from XFELDetAna.plotting.heatmap import heatmapPlot\n", "from XFELDetAna.plotting.simpleplot import simplePlot" ] }, diff --git a/setup.py b/setup.py index 60090e3420b1fdcbd78e5bc250c44452ab17bab2..e73c52388c666bcf62a753bf39d580c60c70b61e 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ install_requires = [ "dill==0.3.0", "docutils==0.17.1", "dynaconf==3.1.4", - "extra_data==1.8.0", + "extra_data==1.9.1", "extra_geom==1.6.0", "gitpython==3.1.0", "h5py==3.5.0", @@ -78,6 +78,7 @@ install_requires = [ "pasha==0.1.0", "prettytable==0.7.2", "princess==0.5", + "psutil==5.9.0", "pypandoc==1.4", "python-dateutil==2.8.1", "pyyaml==5.3", diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py index ec6b4e25c7fe896a6e5b7fa2b99408ccb12ae627..a39ca0ea0ab304fad6d35c8f2be7b25bd0193317 100644 --- a/src/cal_tools/agipdlib.py +++ b/src/cal_tools/agipdlib.py @@ -1,15 +1,14 @@ import os import posixpath -import traceback import zlib from multiprocessing.pool import ThreadPool -from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import h5py import numpy as np import sharedmem -from extra_data import DataCollection +from dateutil import parser +from extra_data import DataCollection, H5File, by_id, components from iCalibrationDB import Conditions, Constants from cal_tools import agipdalgs as calgs @@ -27,10 +26,35 @@ from cal_tools.enums import AgipdGainMode, BadPixels, SnowResolution from cal_tools.h5_copy_except import h5_copy_except_paths from cal_tools.tools import get_constant_from_db_and_time +class AgipdCtrl: + def __init__( + self, + run_dc: DataCollection, + image_src: str, + ctrl_src: str, + raise_error: bool = True, + ): + """ + Initialize AgipdCondition class to read all required AGIPD parameters. -def get_num_cells(fname, loc, module): - with h5py.File(fname, "r") as f: - cells = f[f"INSTRUMENT/{loc}/DET/{module}CH0:xtdf/image/cellId"][()] + :param image_src: H5 source for image data. + :param ctrl_src: H5 source for control (slow) data. + """ + self.run_dc = run_dc + self.image_src = image_src + self.ctrl_src = ctrl_src + + self.raise_error = raise_error + + def get_num_cells(self) -> Optional[int]: + """ + :return mem_cells: Number of memory cells. + return None, if no data available. + """ + cells = np.squeeze( + self.run_dc[ + self.image_src, "image.cellId"].drop_empty_trains().ndarray() + ) if cells.shape[0] == 0: return None maxcell = np.max(cells) @@ -38,91 +62,77 @@ def get_num_cells(fname, loc, module): dists = [abs(o - maxcell) for o in options] return options[np.argmin(dists)] + def get_acq_rate(self) -> Optional[float]: + """Get the acquisition rate from said detector module. + + If the data is available from the middlelayer FPGA_COMP device, + then it is retrieved from there. + If not, the rate is calculated from two different pulses time. -def get_acq_rate(fast_paths: Tuple[str, str, int], - slow_paths: Optional[Tuple[str, str]] = ('', '') - ) -> Optional[float]: - """Get the acquisition rate from said detector module. + The first entry is deliberately not used, as the detector just began + operating, and it might have skipped a train. + + :return acq_rate: the acquisition rate. + return None, if not available. + """ + # Attempt to look for acquisition rate in slow data + rep_rate_src = ( + self.ctrl_src, "bunchStructure.repetitionRate.value") + + if ( + rep_rate_src[0] in self.run_dc.all_sources and + rep_rate_src[1] in self.run_dc.keys_for_source(rep_rate_src[0]) + ): + # The acquisition rate value is stored in a 1D array of type + # float. + # It is desired to loose precision here because the usage is + # about bucketing the rate for managing meta-data. + + return round(float(self.run_dc[rep_rate_src].as_single_value()), 1) + + train_pulses = np.squeeze( + self.run_dc[ + self.image_src, "image.pulseId" + ].drop_empty_trains().train_from_index(0)[1] + ) - If the data is available from the middlelayer FPGA_COMP device, then it is - retrieved from there. If not, the rate is calculated from two different - pulses time. + # Compute acquisition rate from fast data + diff = train_pulses[1] - train_pulses[0] + options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5} + return options.get(diff, None) - The first entry is deliberatly not used, as the detector just began - operating, and it might have skipped a train. + def get_gain_setting( + self, + creation_time: "datetime.datetime", + ) -> Optional[int]: + """Retrieve Gain setting. - :param slow_paths: in which file and h5 path to look for slow data. - The first string is the filename with complete path, - the second string is the key `karabo_id_control` + If the data is available from the middlelayer FPGA_COMP device, + then it is retrieved from there. + If not, the setting is calculated off `setupr` and `patternTypeIndex` - :param fast_paths: in which module file and h5 path to look for pulses. - The first string is the filename with complete path, - the second string is the module device name `karabo_id`, - the third parameter is the module number, used to - navigate through the h5 file structure. + gain-setting 1: setupr@dark=8, setupr@slopespc=40 + gain-setting 0: setupr@dark=0, setupr@slopespc=32 - :return acq_rate: the acquisition rate. - If not found in either files, return None. - """ - # Attempt to look for acquisition rate in slow data - slow_data_file, karabo_id_control = slow_paths - slow_data_file = Path(slow_data_file) - if slow_data_file.is_file(): - slow_data_path = f'CONTROL/{karabo_id_control}/MDL/FPGA_COMP/bunchStructure/repetitionRate/value' # noqa - with h5py.File(slow_data_file, "r") as fin: - if slow_data_path in fin: - # The acquisition rate value is stored in a 1D array of type - # float. Use the 3rd value, arbitrarily chosen. - # It is desired to loose precision here because the usage is - # about bucketing the rate for managing meta-data. - return round(float(fin[slow_data_path][3]), 1) - - # Compute acquisition rate from fast data - fast_data_file, karabo_id, module = fast_paths - fast_data_file = Path(fast_data_file) - if fast_data_file.is_file(): - fast_data_path = f'INSTRUMENT/{karabo_id}/DET/{module}CH0:xtdf/image/pulseId' # noqa - with h5py.File(fast_data_file, "r") as fin: - if fast_data_path in fin: - # pulses is of shape (NNNN, 1), of type uint8. - # Squeeze out the data, and subtract the 3rd entry from the 2nd - # to get a rate. - pulses = np.squeeze(fin[fast_data_path][1:3]) - diff = pulses[1] - pulses[0] - options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5} - return options.get(diff, None) - - -def get_gain_setting(fname: str, h5path_ctrl: str) -> int: - """Retrieve Gain setting. - - If the data is available from the middlelayer FPGA_COMP device, then it is - retrieved from there. - If not, the setting is calculated off `setupr` and `patternTypeIndex` - - gain-setting 1: setupr@dark=8, setupr@slopespc=40 - gain-setting 0: setupr@dark=0, setupr@slopespc=32 - - patternTypeIndex 1: High-gain - patternTypeIndex 2: Medium-gain - patternTypeIndex 3: Low-gain - patternTypeIndex 4: SlopesPC - - :param fname: path to file with control information - :param h5path_ctrl: path to control information inside the file - :return: gain setting - """ - gain_path = f'{h5path_ctrl}/gain/value' - with h5py.File(fname, "r") as fin: - if gain_path in fin: - return fin[gain_path][0] + patternTypeIndex 1: High-gain + patternTypeIndex 2: Medium-gain + patternTypeIndex 3: Low-gain + patternTypeIndex 4: SlopesPC - # Get the index at which the train is not zero. - train_id = fin["INDEX/trainId"][()] - idx = np.nonzero(train_id)[0][0] + :return: gain setting. + return 0, if not available. + """ + # TODO: remove after fixing get_possible_conditions + if creation_time and creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'): + print("Set gain-setting to None for runs taken before 2020-01-31") + return + + if "gain.value" in self.run_dc.keys_for_source(self.ctrl_src): + return self.run_dc[self.ctrl_src, "gain.value"].as_single_value() - setupr = fin[f'{h5path_ctrl}/setupr/value'][idx] - pattern_type_idx = fin[f'{h5path_ctrl}/patternTypeIndex/value'][idx] + setupr = self.run_dc[self.ctrl_src, "setupr.value"].as_single_value() + pattern_type_idx = self.run_dc[ + self.ctrl_src, "patternTypeIndex.value"].as_single_value() if (setupr == 0 and pattern_type_idx < 4) or ( setupr == 32 and pattern_type_idx == 4): @@ -131,60 +141,85 @@ def get_gain_setting(fname: str, h5path_ctrl: str) -> int: setupr == 40 and pattern_type_idx == 4): return 1 else: - raise ValueError('Could not derive gain setting from setupr and patternTypeIndex') # noqa + if self.raise_error: + raise ValueError( + "Could not derive gain setting from" + " setupr and patternTypeIndex" + ) + print( + "WARNING: gain_setting is not available " + f"at source {self.ctrl_src}.\nSet gain_setting to 0.") + # TODO: why return 0 and not None? + return 0 -def get_gain_mode(fname: str, h5path_ctrl: str) -> AgipdGainMode: - """Returns the gain mode (adaptive or fixed) from slow data""" + def get_gain_mode(self) -> AgipdGainMode: + """Returns the gain mode (adaptive or fixed) from slow data""" - h5path_run = h5path_ctrl.replace("CONTROL/", "RUN/", 1) - h5path_gainmode = f'{h5path_run}/gainModeIndex/value' - with h5py.File(fname, "r") as fd: - if h5path_gainmode in fd: - return AgipdGainMode(fd[h5path_gainmode][0]) - return AgipdGainMode.ADAPTIVE_GAIN + if ( + self.ctrl_src in self.run_dc.all_sources and + "gainModeIndex.value" in self.run_dc.keys_for_source( + self.ctrl_src) + ): + return AgipdGainMode(int( + self.run_dc.get_run_value( + self.ctrl_src, "gainModeIndex.value"))) + return AgipdGainMode.ADAPTIVE_GAIN -def get_bias_voltage(fname: str, karabo_id_control: str, - module: Optional[int] = 0) -> int: - """Read the voltage information from the FPGA device of module 0. + def get_bias_voltage( + self, + karabo_id_control: str, + module: Optional[int] = 0 + ) -> int: + """Read the voltage information from the FPGA device of module 0. - Different modules may operate at different voltages. In practice, they all - operate at the same voltage. As such, it is okay to read a single module's - value. + Different modules may operate at different voltages. + In practice, they all operate at the same voltage. + As such, it is okay to read a single module's value. - This value is read from slow data. + If the FPGA module source is not available, 300 will be returned. + 300 is the default bias_voltage value before adding it to slow data. - If the file cannot be accessed, an OSError will be raised. - If the hdf5 path cannot be accessed, None will be returned. + :param karabo_id_control: The detector karabo id, for the control device. + :param module: defaults to module 0 + :return: voltage, a uint16 + """ + voltage_src = ( + f"{karabo_id_control}/FPGA/M_{module}", + "highVoltage.actual.value") - :param fname: path to slow data file with control information - :param karabo_id: The detector Karabo id, for creating the hdf5 path - :param module: defaults to module 0 - :return: voltage, a uint16 - """ - voltage_path = f'/CONTROL/{karabo_id_control}/FPGA/M_{module}/highVoltage/actual/value' # noqa - with h5py.File(fname, "r") as fin: - if voltage_path in fin: - return fin[voltage_path][0] + if ( + voltage_src[0] in self.run_dc.all_sources and + voltage_src[1] in self.run_dc.keys_for_source(voltage_src[0]) + ): + return self.run_dc[voltage_src].as_single_value(atol=1, reduce_by='max') + else: + print( + "WARNING: Unable to read bias_voltage from" + f" {voltage_src[0]}/{voltage_src[1].replace('.','/')} " + "Returning 300 as default bias voltage value." + ) + return 300 -def get_integration_time(fname: str, h5path_ctrl: str) -> int: - """Read integration time from the FPGA device. + def get_integration_time(self) -> int: + """Read integration time from the FPGA device. - The integration time is specified as an integer number of clock - cycles each spanning ~9ns. The default (and legacy) value is 12. + The integration time is specified as an integer number of clock + cycles each spanning ~9ns. The default (and legacy) value is 12. - :param fname: path to file with control information - :param h5path_ctrl: path to control information inside the file - :return: integration time - """ - h5path_run = h5path_ctrl.replace('CONTROL/', 'RUN/', 1) - h5path_time = f'{h5path_run}/integrationTime/value' - with h5py.File(fname, 'r') as fd: - if h5path_time in fd: - return int(fd[h5path_time][0]) - return 12 + :return: integration time + """ + if ( + self.ctrl_src in self.run_dc.all_sources and + 'integrationTime.value' in self.run_dc.keys_for_source( + self.ctrl_src) + ): + return int(self.run_dc.get_run_value( + self.ctrl_src, 'integrationTime.value')) + + return 12 class CellSelection: @@ -228,8 +263,8 @@ class AgipdCorrections: self, max_cells: int, cell_sel: CellSelection, - h5_data_path: str = "INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", - h5_index_path: str = "INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", + h5_data_path: str = "SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", + h5_index_path: str = "SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", corr_bools: Optional[dict] = None, gain_mode: AgipdGainMode = AgipdGainMode.ADAPTIVE_GAIN, comp_threads: int = 1, @@ -361,70 +396,63 @@ class AgipdCorrections: """ module_idx = int(file_name.split('/')[-1].split('-')[2][-2:]) agipd_base = self.h5_data_path.format(module_idx) - idx_base = self.h5_index_path.format(module_idx) data_dict = self.shared_dict[i_proc] data_dict['moduleIdx'][0] = module_idx - try: - f = h5py.File(file_name, "r") - - (valid, first_index, last_index, - train_ids, valid_indices) = self.get_valid_image_idx(idx_base, f) - - if len(valid_indices) == 0: - # If there's not a single valid index, exit early. - data_dict['nImg'][0] = 0 - return 0 - - # store valid trains in shared memory - valid_train_ids = train_ids[valid] - n_valid_trains = len(valid_train_ids) - data_dict["n_valid_trains"][0] = n_valid_trains - data_dict["valid_trains"][:n_valid_trains] = valid_train_ids - - # get cell selection for the images in this file - cm = (self.cell_sel.CM_NONE if apply_sel_pulses - else self.cell_sel.CM_PRESEL) - img_selected = self.cell_sel.get_cells_on_trains( - valid_train_ids, cm=cm) - data_dict["cm_presel"][0] = (cm == self.cell_sel.CM_PRESEL) - - group = f[agipd_base]['image'] - allcells = np.squeeze(group['cellId']) - allpulses = np.squeeze(group['pulseId']) - - firange = self.gen_valid_range(first_index, last_index, - self.max_cells, allcells, - allpulses, valid_indices, - img_selected) - - if firange is None: - # gen_valid_range() returns None if there are no cells - # to correct, exit early. - data_dict['nImg'][0] = 0 - return 0 - - n_img = firange.shape[0] - data_dict['nImg'][0] = n_img - if np.all(np.diff(firange) == 1): - # if firange consists of contiguous indices - # convert firange from fancy indexing to slicing - firange = slice(firange[0], firange[-1]+1) - raw_data = group['data'][firange] - else: - # Avoid very slow performance using fancing indexing, - # if firange consists of non-contiguous indices. - raw_data = group['data'][:][firange] - data_dict['data'][:n_img] = raw_data[:, 0] - data_dict['rawgain'][:n_img] = raw_data[:, 1] - data_dict['cellId'][:n_img] = allcells[firange] - data_dict['pulseId'][:n_img] = allpulses[firange] - data_dict['trainId'][:n_img] = np.squeeze(group['trainId'][:][firange]) # noqa - except Exception as e: - print(f'Error during reading data from file {file_name}: {e}') - print(f'Error traceback: {traceback.format_exc()}') - n_img = 0 + + h5_dc = H5File(file_name) + + # Exclude trains without data. + im_dc = h5_dc.select(agipd_base, "image.*", require_all=True) + + valid_train_ids = self.get_valid_image_idx( + im_dc[agipd_base, "image.trainId"]) + + if len(valid_train_ids) == 0: + # If there's not a single valid train, exit early. + print(f"WARNING: No valid trains for {im_dc.files} to process.") data_dict['nImg'][0] = 0 + return 0 + + # store valid trains in shared memory + # valid_train_ids = train_ids[valid] + n_valid_trains = len(valid_train_ids) + data_dict["n_valid_trains"][0] = n_valid_trains + data_dict["valid_trains"][:n_valid_trains] = valid_train_ids + + # get cell selection for the images in this file + cm = ( self.cell_sel.CM_NONE if apply_sel_pulses + else self.cell_sel.CM_PRESEL ) + + img_selected = self.cell_sel.get_cells_on_trains( + valid_train_ids, cm=cm) + data_dict["cm_presel"][0] = (cm == self.cell_sel.CM_PRESEL) + + # Exclude non_valid trains from the selected data collection. + im_dc = im_dc.select_trains(by_id(valid_train_ids)) + + if "AGIPD500K" in agipd_base: + agipd_comp = components.AGIPD500K(im_dc) + else: + agipd_comp = components.AGIPD1M(im_dc) + kw = { + "unstack_pulses": False, + "pulses": np.nonzero(img_selected), + } + + # [n_modules, n_imgs, 2, x, y] + raw_data = agipd_comp.get_array("image.data", **kw)[0] + n_img = raw_data.shape[0] + + data_dict['nImg'][0] = n_img + data_dict['data'][:n_img] = raw_data[:, 0] + data_dict['rawgain'][:n_img] = raw_data[:, 1] + data_dict['cellId'][:n_img] = agipd_comp.get_array( + "image.cellId", **kw)[0] + data_dict['pulseId'][:n_img] = agipd_comp.get_array( + "image.pulseId", **kw)[0] + data_dict['trainId'][:n_img] = agipd_comp.get_array( + "image.trainId", **kw)[0] return n_img def write_file(self, i_proc, file_name, ofile_name): @@ -437,7 +465,7 @@ class AgipdCorrections: """ module_idx = int(file_name.split('/')[-1].split('-')[2][-2:]) - agipd_base = self.h5_data_path.format(module_idx) + agipd_base = f'INSTRUMENT/{self.h5_data_path}/'.format(module_idx) idx_base = self.h5_index_path.format(module_idx) data_path = f'{agipd_base}/image' data_dict = self.shared_dict[i_proc] @@ -821,78 +849,32 @@ class AgipdCorrections: # Copy the data across into the existing shared-memory array mask[...] = msk[...] - def get_valid_image_idx( - self, idx_base: str, infile: str, raw_format_version: int = 2 - ): - """Return the indices of valid data""" - if raw_format_version == 2: - idxtrains = np.squeeze(infile['/INDEX/trainId']) - - # Check against train ID filter list, if any - if self.train_ids is not None: - valid = np.in1d(idxtrains, self.train_ids) + def get_valid_image_idx(self, im_dc: DataCollection) -> list: # noqa + """Return a list of valid train ids. - if not valid.any(): - # Shortcut to avoid any further loading. - return valid, 0, 0, idxtrains, np.zeros(0, dtype=np.int32) - else: - valid = np.ones_like(idxtrains, dtype=bool) - - # Load count and offsets and filter for non-emtpy trains. - count = np.squeeze(infile[idx_base + "image/count"]) - first = np.squeeze(infile[idx_base + "image/first"]) - valid &= count != 0 - - # Validate that train indices values fall - # between medianTrain +- 1e4 - medianTrain = np.median(idxtrains) - lowok = (idxtrains > medianTrain - 1e4) - highok = (idxtrains < medianTrain + 1e4) - valid &= lowok & highok - - if not valid.any(): - # Shortcut if no valid trains are left. - return valid, 0, 0, idxtrains, np.zeros(0, dtype=np.int32) - - # Last index = last valid train + max. number of memory cells - last_index = int(first[valid][-1] + count[valid][-1]) - first_index = int(first[valid][0]) - # do actual validity filtering: - validc, validf = count[valid], first[valid] - - # Creating an array of validated indices. - # If all indices were validated this array will be the same, - # as what is stored at /DET/image/trainId - valid_indices = np.concatenate( - [ - np.arange(validf[i], validf[i] + validc[i]) - for i in range(validf.size) - ], - axis=0, - ) - valid_indices = np.squeeze(valid_indices).astype(np.int32) - - elif raw_format_version == 1: - status = np.squeeze(infile[idx_base + "image/status"]) - if np.count_nonzero(status != 0) == 0: - raise IOError(f"File {infile} has no valid counts") - last = np.squeeze(infile[idx_base + "image/last"]) - first = np.squeeze(infile[idx_base + "image/first"]) - valid = status != 0 - last_index = int(last[status != 0][-1]) + 1 - first_index = int(first[status != 0][0]) - - idxtrains = np.squeeze(infile["/INDEX/trainId"]) - medianTrain = np.nanmedian(idxtrains) - lowok = (idxtrains > medianTrain - 1e4) - highok = (idxtrains < medianTrain + 1e4) - valid &= lowok & highok - valid_indices = None + Exclude non-valid train ids from past or future. + """ + dc_trains = im_dc.train_ids + if len(dc_trains) == 0: + return 0 + # Check against train ID filter list, if any + if self.train_ids is not None: + valid = np.in1d(dc_trains, self.train_ids) else: - raise AttributeError( - f"Not a known raw format version: {raw_format_version}") + valid = np.ones_like(dc_trains, dtype=bool) + + # Train indices are of type=f32 + # Validate that train indices values fall + # between medianTrain +- 1e4 + medianTrain = np.nanmedian(dc_trains) + lowok = (dc_trains > medianTrain - 1e4) + highok = (dc_trains < medianTrain + 1e4) + valid &= lowok & highok + + # exclude non valid trains + valid_trains = valid * dc_trains - return (valid, first_index, last_index, idxtrains, valid_indices) + return valid_trains[valid_trains!=0] def apply_selected_pulses(self, i_proc: int) -> int: """Select sharedmem data indices to correct based on selected @@ -939,53 +921,6 @@ class AgipdCorrections: return n_img - def gen_valid_range(self, first_index: int, last_index: int, - max_cells: int, allcells: np.array, - allpulses: np.array, - valid_indices: Optional[np.array] = None, - img_selected: Optional[np.array] = None, - ) -> np.array: - """ Validate the arrays of image.cellId and image.pulseId - to check presence of data and to avoid empty trains. - - selected pulses range given from the AGIPD correction notebook - is taken into account if apply_sel_pulses is True - - :param first_index: first index of image data - :param last_index: last index of image data - :param max_cells: number of memory cells to correct - :param allcells: array of image.cellsIds of raw data - :param allpulses: array of image.pulseIds of raw data - :param valid_indices: validated indices of image.data - :param img_selected: mask of selected cells for given - range of trains - :return firange: An array of validated image.data - indices to correct - """ - - if valid_indices is not None: - allcells = allcells[valid_indices] - allpulses = allpulses[valid_indices] - else: - allcells = allcells[first_index:last_index] - allpulses = allpulses[first_index:last_index] - - can_calibrate = (allcells < max_cells) - - if img_selected is not None: - can_calibrate &= img_selected - - if not np.any(can_calibrate): - return - - if valid_indices is None: - firange = np.arange(first_index, last_index) - else: - firange = valid_indices - firange = firange[can_calibrate] - - return firange - def copy_and_sanitize_non_cal_data(self, infile, outfile, agipd_base, idx_base, trains): """ Copy and sanitize data in `infile` that is not touched by @@ -1551,7 +1486,6 @@ class CellRange(CellSelection): return np.tile(self._sel_for_cm(self.flag, self.flag_cm, cm), len(train_sel)) - class LitFrameSelection(CellSelection): """Selection of detector memery cells indicated as lit frames by the AgipdLitFrameFinder