diff --git a/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb b/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb index f0b47d0adf49380cf2bd8a90b651bdc7ec10c591..74d37ede5f3123ad97b96ccada8b7cac07cd3855 100644 --- a/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb +++ b/notebooks/DynamicFF/Correct_DynamicFF_NBC.ipynb @@ -4,11 +4,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Shimadzu HPVX2 Offline Correction\n", + "# Dynamic Flat-field Offline Correction\n", "\n", "Author: Egor Sobolev\n", "\n", - "Offline dynamic flat-field correction for Shimadzu HPVX2 cameras" + "Offline dynamic flat-field correction" ] }, { @@ -17,17 +17,17 @@ "metadata": {}, "outputs": [], "source": [ - "in_folder = \"/gpfs/exfel/exp/SPB/202121/p002919/raw/\" # input folder, required\n", + "in_folder = \"/gpfs/exfel/exp/SPB/202430/p900425/raw\" # input folder, required\n", "out_folder = '/gpfs/exfel/data/scratch/esobolev/test/shimadzu' # output folder, required\n", "metadata_folder = \"\" # Directory containing calibration_metadata.yml when run by xfel-calibrate\n", - "run = 30 # which run to read data from, required\n", + "run = 3 # which run to read data from, required\n", "\n", "# Data files parameters.\n", - "karabo_da = ['HPVX01'] # data aggregators\n", - "karabo_id = \"SPB_EHD_HPVX2_2\" # karabo prefix of Shimadzu HPV-X2 devices\n", + "karabo_da = ['HPVX01/1', 'HPVX01/2'] # data aggregators\n", + "karabo_id = \"SPB_EHD_MIC\" # karabo prefix of Shimadzu HPV-X2 devices\n", "#receiver_id = \"PNCCD_FMT-0\" # inset for receiver devices\n", "#path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data\n", - "instrument_source_template = '{}/CAM/CAMERA:daqOutput' # data source path in h5file. Template filled with karabo_id\n", + "instrument_source_template = 'SPB_EHD_MIC/CAM/HPVX2_{module}:daqOutput' # data source path in h5file.\n", "image_key = \"data.image.pixels\" # image data key in Karabo or exdf notation\n", "\n", "# Database access parameters.\n", @@ -42,11 +42,11 @@ "downsample_factors = [1, 1] # list of downsample factors for each image dimention (y, x)\n", "\n", "constants_folder = \"/gpfs/exfel/data/scratch/esobolev/test/shimadzu\"\n", - "db_module = \"SHIMADZU_HPVX2_M001\"\n", + "db_module_template = \"Shimadzu_HPVX2_{}\"\n", "\n", "num_proc = 32 # number of processes running correction in parallel\n", "\n", - "corrected_source_template = '{}/CORR/CAMERA:daqOutput' # data source path in h5file. Template filled with karabo_id" + "corrected_source_template = 'SPB_EHD_MIC/CORR/HPVX2_{module}:output' # data source path in h5file." ] }, { @@ -59,8 +59,9 @@ "import h5py\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", + "from IPython.display import display, Markdown\n", "\n", - "from extra_data import RunDirectory\n", + "from extra_data import RunDirectory, by_id\n", "\n", "%matplotlib inline\n", "from cal_tools.step_timing import StepTimer\n", @@ -77,8 +78,29 @@ "metadata": {}, "outputs": [], "source": [ - "instrument = karabo_id.split(\"_\")[0]\n", - "source = instrument_source_template.format(karabo_id)\n", + "index_group = image_key.partition('.')[0]\n", + "instrument, part, component = karabo_id.split('_')\n", + "\n", + "aggregators = {}\n", + "sources = {}\n", + "source_to_db = {}\n", + "print(\"Sources:\")\n", + "for da in karabo_da:\n", + " aggr, _, module = da.partition('/')\n", + " instrument_source_name = instrument_source_template.format(\n", + " instrument=instrument, part=part, component=component,\n", + " module=module\n", + " )\n", + " corrected_source_name = corrected_source_template.format(\n", + " instrument=instrument, part=part, component=component,\n", + " module=module\n", + " )\n", + " aggregators.setdefault(aggr, []).append(\n", + " (instrument_source_name, corrected_source_name))\n", + " sources[instrument_source_name] = aggr\n", + " source_to_db[instrument_source_name] = db_module_template.format(module)\n", + " print('-', instrument_source_name)\n", + "print()\n", "\n", "print(f\"Detector in use is {karabo_id}\")\n", "print(f\"Instrument {instrument}\")\n", @@ -99,62 +121,42 @@ "metadata": {}, "outputs": [], "source": [ - "step_timer.start()\n", - "\n", - "# Offsets\n", - "constant_name = \"Offset\"\n", - "const_file = f\"{constants_folder}/const_{constant_name}_{db_module}.h5\"\n", - "if not os.path.isfile(const_file):\n", - " raise FileNotFoundError(f\"{constant_name} constants are not found for {karabo_id}.\")\n", - "\n", - "with h5py.File(const_file, 'r') as f:\n", - " dark_conditions = dict(\n", - " num_frames=int(f[\"condition/Memory cells/value\"][()]),\n", - " nx=int(f[\"condition/Pixels X/value\"][()]),\n", - " ny=int(f[\"condition/Pixels Y/value\"][()]),\n", - " n_components=int(f[\"condition/FF components/value\"][()]),\n", - " )\n", - " dark = f[\"data\"][:]\n", - " dark_creation_time = f[\"creation_time\"][()].decode()\n", - "\n", - "print(f\"{constant_name}: {dark_creation_time}\")\n", - "\n", - "# Flat-field components\n", - "constant_name = \"ComponentsFF\"\n", - "const_file = f\"{constants_folder}/const_{constant_name}_{db_module}.h5\"\n", - "if not os.path.isfile(const_file):\n", - " raise FileNotFoundError(f\"{constant_name} constants are not found for {karabo_id}.\")\n", - "\n", - "with h5py.File(const_file, 'r') as f:\n", - " flat_conditions = dict(\n", - " num_frames=int(f[\"condition/Memory cells/value\"][()]),\n", - " nx=int(f[\"condition/Pixels X/value\"][()]),\n", - " ny=int(f[\"condition/Pixels Y/value\"][()]),\n", - " n_components=int(f[\"condition/FF components/value\"][()]),\n", - " )\n", - " flat = f[\"data\"][:]\n", - " components = flat[1:]\n", - " flat = flat[0]\n", - " flat_creation_time = f[\"creation_time\"][()].decode()\n", - "\n", - "print(f\"{constant_name}: {dark_creation_time}\")\n", - "\n", - "if not all(flat_conditions[key] == value for key, value in dark_conditions.items()):\n", - " raise ValueError(\"Conditions for offsets and flat-field components are different\")\n", + "requested_conditions = {\n", + " \"frame_size\": 1.0,\n", + "}\n", "\n", - "conditions = type(\"Conditions\", (), flat_conditions)\n", - "\n", - "print(f\"Image size: {conditions.nx} x {conditions.ny} px\")\n", - "print(f\"Number of flat-field components: {conditions.n_components}\")\n", + "step_timer.start()\n", "\n", - "if conditions.n_components < n_components:\n", - " warnings.warn(\n", - " f\"The correction set to use {n_components} flat-field components, \"\n", - " f\"but constants contains only {conditions.n_components}.\"\n", - " \"The settings adjusted to the number of available components.\"\n", - " )\n", - "else:\n", - " components = components[:n_components]\n", + "corrections = {}\n", + "constant_types = [\"Offset\", \"DynamicFF\"]\n", + "for source, db_module in source_to_db.items():\n", + " constants = {}\n", + " for constant_name in constant_types:\n", + " const_file = f\"{constants_folder}/const_{constant_name}_{db_module}.h5\"\n", + " if not os.path.isfile(const_file):\n", + " raise FileNotFoundError(f\"{constant_name} constants are not found for {karabo_id}.\")\n", + "\n", + " with h5py.File(const_file, 'r') as f:\n", + " conditions = dict(\n", + " frame_size=int(f[\"condition/Frame Size/value\"][()])\n", + " )\n", + " data = f[\"data\"][:]\n", + " data_creation_time = f[\"creation_time\"][()].decode()\n", + " \n", + " if not all(conditions[key] == value for key, value in requested_conditions.items()):\n", + " raise ValueError(\"Conditions for {constant_name} are not match\")\n", + "\n", + " print(f\"{source} {db_module} {constant_name}: {data_creation_time}\")\n", + " constants[constant_name] = data\n", + "\n", + " dark = constants[\"Offset\"]\n", + " flat = constants[\"DynamicFF\"][0]\n", + " components = constants[\"DynamicFF\"][1:][:n_components]\n", + "\n", + " dffc = DynamicFlatFieldCorrection.from_constants(\n", + " dark, flat, components, downsample_factors)\n", + "\n", + " corrections[source] = dffc\n", "\n", "step_timer.done_step(\"Load calibration constants\")" ] @@ -172,121 +174,102 @@ "metadata": {}, "outputs": [], "source": [ - "step_timer.start()\n", - "dc = RunDirectory(f\"{in_folder}/r{run:04d}\")\n", - "\n", - "num_trains, num_cells = dc[source][image_key].shape[:2]\n", - "num_images = num_trains * num_cells\n", - "print(\"Number of trains:\", num_trains)\n", - "print(\"Number of images:\", num_images)\n", - "\n", - "dffc = DynamicFlatFieldCorrection.from_constants(\n", - " dark, flat, components, downsample_factors)\n", - "\n", - "proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, source, image_key)\n", - "\n", - "proc.start_workers()\n", - "proc.run(dc)\n", - "proc.join_workers()\n", - "\n", - "train_ids = proc.rdr.trains\n", - "corrected_images = np.stack(proc.rdr.results, 0)\n", - "step_timer.done_step(\"Correct images\")" + "report = []\n", + "for aggr, sources in aggregators.items():\n", + " dc = RunDirectory(f\"{in_folder}/r{run:04d}\", f\"RAW-R{run:04d}-{aggr}-S*.h5\")\n", + "\n", + " train_ids = set()\n", + " keydata_cache = {}\n", + " for instrument_source, corrected_source in sources:\n", + " keydata = dc[instrument_source][image_key].drop_empty_trains()\n", + " train_ids.update(keydata.train_ids)\n", + " keydata_cache[instrument_source] = keydata\n", + " train_ids = np.array(sorted(train_ids))\n", + " ts = dc.select_trains(by_id[train_ids]).train_timestamps().astype(np.uint64)\n", + "\n", + " for seq_id, train_mask in sequence_trains(train_ids, 200):\n", + " step_timer.start()\n", + " print('* sequience', seq_id)\n", + " seq_train_ids = train_ids[train_mask]\n", + " seq_timestamps = ts[train_mask]\n", + " dc_seq = dc.select_trains(by_id[seq_train_ids])\n", + " ntrains = len(seq_train_ids)\n", + "\n", + " # create output file\n", + " channels = [f\"{s[1]}/{index_group}\" for s in sources]\n", + "\n", + " f = DataFile.from_details(out_folder, aggr, run, seq_id)\n", + " f.create_metadata(like=dc, instrument_channels=channels)\n", + " f.create_index(seq_train_ids, timestamps=seq_timestamps)\n", + "\n", + " seq_report = {}\n", + " image_datasets = {}\n", + " for instrument_source, corrected_source in sources:\n", + " keydata = dc_seq[instrument_source][image_key].drop_empty_trains()\n", + " count = keydata.data_counts()\n", + " i = np.flatnonzero(count.values)\n", + " raw_images = keydata.select_trains(np.s_[i]).ndarray()\n", + "\n", + " # not pulse resolved\n", + " shape = keydata.shape\n", + " count = np.in1d(seq_train_ids, keydata.train_ids).astype(int)\n", + "\n", + " src = f.create_instrument_source(corrected_source)\n", + " src.create_index(index_group=count)\n", + "\n", + " ds_data = src.create_key(image_key, shape=shape, dtype=np.float32)\n", + " image_datasets[corrected_source] = ds_data\n", + "\n", + " step_timer.done_step(\"Create output file\")\n", + "\n", + " for instrument_source, corrected_source in sources:\n", + " step_timer.start()\n", + " dc_seq = dc.select_trains(by_id[seq_train_ids])\n", + "\n", + " dffc = corrections[instrument_source]\n", + " proc = FlatFieldCorrectionFileProcessor(dffc, num_proc, instrument_source, image_key)\n", + "\n", + " proc.start_workers()\n", + " proc.run(dc_seq)\n", + " proc.join_workers()\n", + "\n", + " # not pulse resolved\n", + " corrected_images = np.stack(proc.rdr.results, 0)\n", + " image_datasets[corrected_source][:] = corrected_images\n", + "\n", + " seq_report[instrument_source] = (raw_images[0, 0], corrected_images[:20, 0])\n", + " step_timer.done_step(\"Correct flat-field\")\n", + "\n", + " f.close()\n", + " report.append(seq_report)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": false + }, "outputs": [], "source": [ "step_timer.start()\n", "\n", - "corr_source = corrected_source_template.format(karabo_id)\n", - "channel = image_key.partition('.')[0]\n", - "data_source_id = corr_source + '/' + channel\n", - "\n", - "ts = dc.train_timestamps().astype(np.uint64)\n", - "ts = ts[np.in1d(dc.train_ids, train_ids)]\n", - "\n", - "for seq_id, train_mask in sequence_trains(train_ids):\n", - " seq_train_ids = train_ids[train_mask]\n", - " seq_timestamps = ts[train_mask]\n", - " ntrains = len(seq_train_ids)\n", - " \n", - " f = DataFile.from_details(out_folder, karabo_da[0], run, seq_id)\n", - " src = f.create_instrument_source(corr_source)\n", - " \n", - " f.create_metadata(like=dc, instrument_channels=(data_source_id,))\n", - " f.create_index(seq_train_ids, timestamps=seq_timestamps)\n", - " \n", - " channels = {\n", - " image_key.partition('.')[0]: np.ones(ntrains, int)\n", - " }\n", - " src.create_index(**channels)\n", - " src.create_key(image_key, corrected_images[train_mask])\n", - "\n", - " f.close()\n", - " \n", - "step_timer.done_step(\"Save corrected images\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The first raw image" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "step_timer.start()\n", + "for source, (raw_image, corrected_images) in report[0].items():\n", + " display(Markdown(f\"# {source}\"))\n", "\n", - "counts = dc[source][image_key].data_counts()\n", - "i = np.flatnonzero(counts.values)\n", + " display(Markdown(\"## The first raw image\"))\n", + " plot_camera_image(raw_images[0, 0])\n", + " plt.show()\n", "\n", - "raw_images = dc[source][image_key].select_trains(np.s_[i]).ndarray()\n", - "plot_camera_image(raw_images[0, 0])\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The first corrected image" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_camera_image(corrected_images[0, 0])\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The first corrected images in the trains (up to 20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_images(corrected_images[:20, 0], figsize=(13, 8))\n", - "plt.show()\n", - "step_timer.done_step(\"Draw examples of corrected images\")" + " display(Markdown(\"## The first corrected image\"))\n", + " plot_camera_image(corrected_images[0])\n", + " plt.show()\n", + "\n", + " display(Markdown(\"## The first corrected images in the trains (up to 20)\"))\n", + " plot_images(corrected_images, figsize=(13, 8))\n", + " plt.show()\n", + "\n", + "step_timer.done_step(\"Draw images\")" ] }, {