diff --git a/notebooks/LPD/LPD_Correct_Fast.ipynb b/notebooks/LPD/LPD_Correct_Fast.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..136a29bc66cadcbc1d88288b2eb7a0aad9c2e98b --- /dev/null +++ b/notebooks/LPD/LPD_Correct_Fast.ipynb @@ -0,0 +1,609 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LPD Offline Correction #\n", + "\n", + "Author: European XFEL Data Analysis Group" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-03T15:19:56.056417Z", + "start_time": "2018-12-03T15:19:56.003012Z" + } + }, + "outputs": [], + "source": [ + "# Input parameters\n", + "in_folder = \"/gpfs/exfel/exp/FXE/202201/p003073/raw/\" # the folder to read data from, required\n", + "out_folder = \"/gpfs/exfel/data/scratch/schmidtp/random/LPD_test\" # the folder to output to, required\n", + "metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate.\n", + "sequences = [0] # sequences to correct, set to empty for all, range allowed\n", + "modules = '' # modules to correct, set to -1 for all, range allowed, used only when karabo_da is empty\n", + "run = 10 # runs to process, required\n", + "\n", + "karabo_id = \"FXE_DET_LPD1M-1\" # karabo karabo_id\n", + "karabo_da = '' # a list of data aggregators names, Default empty string for selecting all data aggregators\n", + "input_source = '{karabo_id}/DET/{module_index}CH0:xtdf' # Input fast data source.\n", + "output_source = '' # Output fast data source, empty to use same as input\n", + "\n", + "# CalCat parameters\n", + "use_dir_creation_date = True # use the creation date of the directory for database time derivation\n", + "cal_db_root = '/gpfs/exfel/d/cal/caldb_store'\n", + "\n", + "# Operating conditions\n", + "mem_cells = 512 # Memory cells.\n", + "bias_voltage = 250.0 # Detector bias voltage.\n", + "capacitor = '5pF' # Capacitor setting: 5pF or 50pF\n", + "photon_energy = 9.2 # Photon energy in kEv.\n", + "category = 0 # Whom to blame.\n", + "\n", + "# Correction parameters.\n", + "offset_corr = True # Offset correction.\n", + "rel_gain = True # Gain correction based on RelativeGain constant.\n", + "ff_map = True # Gain correction based on FFMap constant.\n", + "gain_amp_map = True # Gain correction based on GainAmpMap constant.\n", + "\n", + "# Output options\n", + "overwrite = True # set to True if existing data should be overwritten\n", + "\n", + "sequences_per_node = 1 # sequence files to process per node\n", + "\n", + "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n", + " from xfel_calibrate.calibrate import balance_sequences as bs\n", + " return bs(in_folder, run, sequences, sequences_per_node, karabo_da)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-12-03T15:19:56.990566Z", + "start_time": "2018-12-03T15:19:56.058378Z" + } + }, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "from pathlib import Path\n", + "from time import perf_counter\n", + "import gc\n", + "import re\n", + "import warnings\n", + "\n", + "import numpy as np\n", + "import h5py\n", + "\n", + "import matplotlib\n", + "matplotlib.use('agg')\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "from calibration_client import CalibrationClient\n", + "from calibration_client.modules import CalibrationConstantVersion\n", + "import extra_data as xd\n", + "import extra_geom as xg\n", + "import pasha as psh\n", + "\n", + "from extra_data.components import LPD1M\n", + "\n", + "from cal_tools.enums import BadPixels\n", + "from cal_tools.lpdalgs import correct_lpd_frames\n", + "from cal_tools.tools import CalibrationMetadata, get_dir_creation_date, write_compressed_frames\n", + "from cal_tools.h5_copy_except import h5_copy_except_paths\n", + "from cal_tools.files import DataFile\n", + "from cal_tools.restful_config import restful_config" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_re = re.compile(r'^RAW-R(\\d{4})-(\\w+\\d+)-S(\\d{5})$') # This should probably move to cal_tools\n", + "\n", + "out_folder = Path(out_folder)\n", + "out_folder.mkdir(exist_ok=True)\n", + "\n", + "output_source = output_source or input_source\n", + "\n", + "cal_db_root = Path(cal_db_root)\n", + "\n", + "metadata = CalibrationMetadata(metadata_folder or out_folder)\n", + "\n", + "if use_dir_creation_date:\n", + " creation_time = get_dir_creation_date(in_folder, run) \n", + "else:\n", + " from datetime import datetime\n", + " creation_time = datetime.now()\n", + " \n", + "print(f'Using {creation_time.isoformat()} as creation time')\n", + "\n", + "# Pick all modules/aggregators or those selected.\n", + "if not karabo_da:\n", + " if not modules:\n", + " modules = list(range(16))\n", + "\n", + " karabo_da = [f'LPD{i:02d}' for i in modules]\n", + " \n", + "# Pick all sequences or those selected.\n", + "if not sequences:\n", + " do_sequence = lambda seq: True\n", + "else:\n", + " do_sequence = [int(x) for x in sequences].__contains__ \n", + " \n", + "# List of detector sources.\n", + "det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Select data to process" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Open the entire input directory.\n", + "dc_all = xd.RunDirectory(Path(in_folder) / f'r{run:04d}') \\\n", + " .select([(src, 'image.*') for src in det_inp_sources])\n", + "\n", + "out_folder = Path(out_folder) \n", + "\n", + "data_to_process = []\n", + "\n", + "for file_access in dc_all.files:\n", + " match = file_re.match(Path(file_access.filename).stem)\n", + " \n", + " if match[2] not in karabo_da or not do_sequence(int(match[3])):\n", + " continue\n", + " \n", + " outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(\n", + " run=int(match[1]), aggregator=match[2], seq=int(match[3]))\n", + "\n", + " data_to_process.append((file_access.filename, match[2], outp_path))\n", + "\n", + "print('Files to process:')\n", + "for data_descr in sorted(data_to_process, key=lambda x: f'{x[1]}{x[0]}'):\n", + " print('\\t'.join(data_descr[1::-1]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Obtain and prepare calibration constants" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Connect to CalCat.\n", + "calcat_config = restful_config['calcat']\n", + "client = CalibrationClient(\n", + " client_id=calcat_config['user-id'],\n", + " client_secret=calcat_config['user-secret'],\n", + " user_email=calcat_config['user-email'],\n", + " base_api_url=calcat_config['base-api-url'],\n", + " token_url=calcat_config['token-url'],\n", + " refresh_url=calcat_config['refresh-url'],\n", + " auth_url=calcat_config['auth-url'],\n", + " scope='')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dark_calibrations = {\n", + " 1: 'Offset', # np.float32\n", + " 14: 'BadPixelsDark' # should be np.uint32, but is np.float64\n", + "}\n", + "\n", + "dark_condition = [\n", + " dict(parameter_id=1, value=bias_voltage), # Sensor bias voltage\n", + " dict(parameter_id=7, value=mem_cells), # Memory cells\n", + " dict(parameter_id=15, value=capacitor), # Feedback capacitor\n", + " dict(parameter_id=13, value=256), # Pixels X\n", + " dict(parameter_id=14, value=256), # Pixels Y\n", + "]\n", + "\n", + "illuminated_calibrations = {\n", + " 20: 'BadPixelsFF', # np.uint32\n", + " 42: 'GainAmpMap', # np.float32\n", + " 43: 'FFMap', # np.float32\n", + " 44: 'RelativeGain' # np.float32\n", + "}\n", + "\n", + "illuminated_condition = dark_condition.copy()\n", + "illuminated_condition += [\n", + " dict(parameter_id=3, value=photon_energy), # Source energy\n", + " dict(parameter_id=25, value=category) # category\n", + "]\n", + "\n", + "const_data = {}\n", + "const_load_mp = psh.ProcessContext(num_workers=24)\n", + "\n", + "print('Querying calibration database', end='', flush=True)\n", + "start = perf_counter()\n", + "for calibrations, condition in [\n", + " (dark_calibrations, dark_condition),\n", + " (illuminated_calibrations, illuminated_condition)\n", + "]:\n", + " resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(\n", + " client, 'FXE_DET_LPD1M-1', list(calibrations.keys()),\n", + " {'parameters_conditions_attributes': condition},\n", + " karabo_da='', event_at=None, snapshot_at=None)\n", + "\n", + " if not resp['success']:\n", + " raise RuntimeError(resp)\n", + "\n", + " for ccv in resp['data']:\n", + " cc = ccv['calibration_constant']\n", + " da = ccv['physical_detector_unit']['karabo_da']\n", + " calibration_name = calibrations[cc['calibration_id']]\n", + " \n", + " dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32\n", + " \n", + " const_data[(da, calibration_name)] = dict(\n", + " path=Path(ccv['path_to_file']) / ccv['file_name'],\n", + " dataset=ccv['data_set_name'],\n", + " data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)\n", + " )\n", + " print('.', end='', flush=True)\n", + " \n", + "total_time = perf_counter() - start\n", + "print(f'{total_time:.1f}s')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_constant_dataset(wid, index, const_descr):\n", + " ccv_entry = const_data[const_descr]\n", + " \n", + " with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:\n", + " fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])\n", + " \n", + " print('.', end='', flush=True)\n", + "\n", + "print('Loading calibration data', end='', flush=True)\n", + "start = perf_counter()\n", + "const_load_mp.map(load_constant_dataset, list(const_data.keys()))\n", + "total_time = perf_counter() - start\n", + "\n", + "print(f'{total_time:.1f}s')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ccv_offsets = {}\n", + "ccv_gains = {}\n", + "ccv_masks = {}\n", + "\n", + "def prepare_constants(wid, index, aggregator):\n", + " consts = {calibration_name: entry['data']\n", + " for (aggregator_, calibration_name), entry\n", + " in const_data.items()\n", + " if aggregator == aggregator_}\n", + " \n", + " if offset_corr and 'Offset' in consts:\n", + " ccv_offsets[aggregator] = consts['Offset'].astype(np.float32)\n", + " else:\n", + " ccv_offsets[aggregator] = np.zeros((256, 256, mem_cells, 3), dtype=np.float32)\n", + " \n", + " ccv_gains[aggregator] = np.ones((256, 256, mem_cells, 3), dtype=np.float32)\n", + " \n", + " if 'BadPixelsDark' in consts:\n", + " ccv_masks[aggregator] = consts['BadPixelsDark'].astype(np.uint32)\n", + " else:\n", + " ccv_masks[aggregator] = np.zeros((256, 256, mem_cells, 3), dtype=np.uint32)\n", + " \n", + " if rel_gain and 'RelativeGain' in consts:\n", + " ccv_gains[aggregator] *= consts['RelativeGain']\n", + " \n", + " if ff_map and 'FFMap' in consts:\n", + " ccv_gains[aggregator] *= consts['FFMap']\n", + " \n", + " if 'BadPixelsFF' in consts:\n", + " np.bitwise_or(ccv_masks[aggregator], consts['BadPixelsFF'], out=ccv_masks[aggregator])\n", + " \n", + " if gain_amp_map and 'GainAmpMap' in consts:\n", + " ccv_gains[aggregator] *= consts['GainAmpMap']\n", + " \n", + " print('.', end='', flush=True)\n", + " \n", + "\n", + "print('Preparing constants', end='', flush=True)\n", + "start = perf_counter()\n", + "psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)\n", + "total_time = perf_counter() - start\n", + "print(f'{total_time:.1f}s')\n", + "\n", + "const_data.clear() # Clear raw constants data now to save memory.\n", + "gc.collect();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def correct_file(wid, index, work):\n", + " filename, aggregator, outp_path = work\n", + " module_index = int(aggregator[-2:])\n", + " \n", + " start = perf_counter()\n", + " dc = xd.H5File(filename).select('*', 'image.*', require_all=True)\n", + " inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]\n", + " open_time = perf_counter() - start\n", + " \n", + " # Load raw data for this file.\n", + " start = perf_counter()\n", + " in_data = inp_source['image.data'].ndarray().squeeze()\n", + " in_cell = inp_source['image.cellId'].ndarray().squeeze()\n", + " in_pulse = inp_source['image.pulseId'].ndarray().squeeze()\n", + " read_time = perf_counter() - start\n", + " \n", + " # Allocate output arrays.\n", + " out_pixels = np.zeros((in_data.shape[0], 256, 256), dtype=np.float32)\n", + " out_gain = np.zeros((in_data.shape[0], 256, 256), dtype=np.uint8)\n", + " out_mask = np.zeros((in_data.shape[0], 256, 256), dtype=np.uint32)\n", + " \n", + " start = perf_counter()\n", + " correct_lpd_frames(in_data, in_cell,\n", + " out_pixels, out_gain, out_mask,\n", + " ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],\n", + " num_threads=num_threads_per_worker)\n", + " correct_time = perf_counter() - start\n", + " \n", + " image_counts = inp_source['image.data'].data_counts(labelled=False)\n", + " \n", + " start = perf_counter()\n", + " if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:\n", + " fa = dc.files[0]\n", + " sel_trains = np.isin(fa.train_ids, dc.train_ids)\n", + " \n", + " outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)\n", + " \n", + " DataFile.instrument_source_pattern = re.compile(r'^[\\w\\/-]+:\\w+$')\n", + " with DataFile(outp_path, 'w') as outp_file: \n", + " outp_file.create_index(\n", + " train_ids=dc.train_ids,\n", + " timestamp=fa.file['INDEX/timestamp'][sel_trains],\n", + " flag=fa.validity_flag[sel_trains])\n", + " \n", + " outp_source = outp_file.create_instrument_source(outp_source_name)\n", + " \n", + " outp_source.create_index(image=image_counts)\n", + " outp_source.create_key('image.data', data=out_pixels)\n", + " outp_source.create_key('image.cellId', data=in_cell)\n", + " outp_source.create_key('image.pulseId', data=in_pulse)\n", + " write_compressed_frames(\n", + " out_gain, outp_file, f'INSTRUMENT/{outp_source_name}/image/gain', comp_threads=8)\n", + " write_compressed_frames(\n", + " out_mask, outp_file, f'INSTRUMENT/{outp_source_name}/image/mask', comp_threads=8)\n", + " \n", + " outp_file.create_metadata(like=dc)\n", + " write_time = perf_counter() - start\n", + " \n", + " total_time = open_time + read_time + correct_time + write_time\n", + " frame_rate = in_data.shape[0] / total_time\n", + " \n", + " print('{}\\t{}\\t{:.3f}\\t{:.3f}\\t{:.3f}\\t{:.3f}\\t{:.3f}\\t{}\\t{:.1f}'.format(\n", + " wid, aggregator, open_time, read_time, correct_time, write_time, total_time,\n", + " in_data.shape[0], frame_rate))\n", + " \n", + " in_data = None\n", + " in_cell = None\n", + " in_pulse = None\n", + " out_pixels = None\n", + " out_gain = None\n", + " out_mask = None\n", + " gc.collect()\n", + "\n", + "# 8/18, 479s\n", + "# 10/18, 464s\n", + " \n", + "num_workers = 5\n", + "num_threads_per_worker = 24\n", + "print('worker\\tDA\\topen\\tread\\tcorrect\\twrite\\ttotal\\tframes\\trate')\n", + " \n", + "start = perf_counter()\n", + "psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)\n", + "total_time = perf_counter() - start\n", + "\n", + "print(f'Total time: {total_time:.1f}s')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data preview for first train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "geom = xg.LPD_1MGeometry.from_quad_positions(\n", + " [(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])\n", + "\n", + "output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]\n", + "dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])\n", + "\n", + "LPD1M._source_re = re.compile(r'(?P<detname>.+_LPD1M.*)\\/(?:DET|CORR)\\/(?P<modno>\\d+)CH')\n", + "det = LPD1M(dc, detector_name=karabo_id)\n", + "data = det.get_array('image.data')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Intensity histogram across all cells" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "left_edge_ratio = 0.01\n", + "right_edge_ratio = 0.99\n", + "\n", + "fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))\n", + "values, bins, _ = ax.hist(np.ravel(data.data), bins=500, range=(-5000, 20000))\n", + "\n", + "def find_nearest_index(array, value):\n", + " return (np.abs(array - value)).argmin()\n", + "\n", + "cum_values = np.cumsum(values)\n", + "vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]\n", + "vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]\n", + "\n", + "max_value = values.max()\n", + "ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)\n", + "ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',\n", + " color='red', ha='center', va='bottom', size='large')\n", + "ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',\n", + " color='red', ha='center', va='bottom', size='large')\n", + "ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',\n", + " color='red', rotation=90, ha='left', va='center', size='x-large')\n", + "\n", + "ax.set_xlim(-5000, 20000)\n", + "ax.set_ylim(0, max_value*1.1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### First memory cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)\n", + "geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train average" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-13T18:24:57.547563Z", + "start_time": "2018-11-13T18:24:56.995005Z" + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)\n", + "geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lowest gain stage per pixel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)\n", + "p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);\n", + "\n", + "cb = ax.images[0].colorbar\n", + "cb.set_ticks([0, 1, 2])\n", + "cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pycal", + "language": "python", + "name": "pycal" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/xfel_calibrate/notebooks.py b/src/xfel_calibrate/notebooks.py index 299722f1da4069fc15933f42eeb1774fc58a413c..a26c4afdf8ea3da72a9de9300f0796b016ad517a 100644 --- a/src/xfel_calibrate/notebooks.py +++ b/src/xfel_calibrate/notebooks.py @@ -85,11 +85,11 @@ notebooks = { "cluster cores": 8}, }, "CORRECT": { - "notebook": "notebooks/LPD/LPD_Correct_and_Verify.ipynb", + "notebook": "notebooks/LPD/LPD_Correct_Fast.ipynb", "concurrency": {"parameter": "sequences", "default concurrency": [-1], "use function": "balance_sequences", - "cluster cores": 32}, + "cluster cores": 16}, }, "XGM_MINE": { "notebook": "notebooks/LPD/Mine_RadIntensity_vs_XGM_NBC.ipynb",