diff --git a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb index cffad2c30d8dc3e87d418139a09ab16b54585a3b..5b715e5972993cc2b8cca21bdafcefe4ad930ed8 100644 --- a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb +++ b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb @@ -32,7 +32,7 @@ "karabo_da_control = \"PNCCD02\" # file inset for control data\n", "karabo_id = \"SQS_NQS_PNCCD1MP\" # karabo prefix of PNCCD devices\n", "receiver_id = \"PNCCD_FMT-0\" # inset for receiver devices\n", - "path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data\n", + "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", "path_template_seqs = \"{}/r{:04d}/*PNCCD01-S*.h5\"\n", "h5path = '{}/CAL/{}:output' # path to data in the HDF5 file \n", "h5path_ctrl = '{}/CTRL/TCTRL'\n", @@ -41,7 +41,6 @@ "use_dir_creation_date = True # To obtain creation time of the run\n", "number_dark_frames = 0 # number of images to be used, if set to 0 all available images are used\n", "chunk_size_idim = 1 # H5 chunking size of output data\n", - "cluster_profile = \"noDB\" # ipcluster profile to use\n", "\n", "cpuCores = 40\n", "commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations\n", @@ -50,7 +49,6 @@ "split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise\n", "sequences_per_node = 1\n", "limit_images = 0\n", - "seq_num = 0 # sequence number for which the last plot at the end of the notebook is plotted\n", "\n", "# pnCCD parameters:\n", "fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.\n", @@ -68,7 +66,7 @@ "common_mode = True # Apply common mode correction\n", "relgain = True # Apply relative gain correction\n", "cti = False # Apply charge transfer inefficiency correction (not implemented, yet)\n", - "do_pattern_classification = True # classify split events\n", + "pattern_classification = True # classify split events\n", "\n", "saturated_threshold = 32000. # full well capacity in ADU\n", "\n", @@ -94,7 +92,7 @@ " corr_bools[\"relgain\"] = relgain\n", " corr_bools[\"cti\"] = cti\n", " corr_bools[\"common_mode\"] = common_mode\n", - " corr_bools[\"pattern_class\"] = do_pattern_classification" + " corr_bools[\"pattern_class\"] = pattern_classification" ] }, { @@ -115,14 +113,15 @@ "import time\n", "import traceback\n", "import warnings\n", + "import traceback\n", "from datetime import timedelta\n", "from typing import Tuple\n", - "\n", "warnings.filterwarnings('ignore')\n", "\n", "import h5py\n", - "from extra_data import H5File\n", "import matplotlib.pyplot as plt\n", + "import pasha as psh\n", + "from extra_data import H5File, RunDirectory\n", "from iminuit import Minuit\n", "from IPython.display import Markdown, display\n", "\n", @@ -136,15 +135,15 @@ " get_random_db_interface,\n", " map_modules_from_folder,\n", ")\n", + "from cal_tools.step_timing import StepTimer\n", + "from cal_tools import h5_copy_except\n", "from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions\n", "from iCalibrationDB.detectors import DetectorTypes\n", "from prettytable import PrettyTable\n", "\n", "profiler = xprof.Profiler()\n", "profiler.disable()\n", - "from XFELDetAna.util import env\n", "\n", - "env.iprofile = cluster_profile\n", "from XFELDetAna import xfelpyanatools as xana\n", "from XFELDetAna import xfelpycaltools as xcal\n", "from XFELDetAna.plotting.util import prettyPlotting\n", @@ -175,8 +174,6 @@ "\n", "# Paths to the data:\n", "ped_dir = \"{}/r{:04d}\".format(in_folder, run)\n", - "fp_name = path_template.format(run, karabo_da)\n", - "fp_path = '{}/{}'.format(ped_dir, fp_name)\n", "h5path = h5path.format(karabo_id, receiver_id)\n", "print(\"HDF5 path to data: {}\\n\".format(h5path))\n", "\n", @@ -208,39 +205,16 @@ "mapped_files, _, total_sequences, _, _ = map_modules_from_folder(\n", " in_folder=in_folder,\n", " run=run,\n", - " path_template='RAW-R{:04d}-{}-S{:05d}.h5',\n", - " karabo_da=karabo_da,\n", + " path_template=path_template,\n", + " karabo_da=[karabo_da],\n", " sequences=sequences,\n", " qm_naming=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Reading all sequences of the run:\n", + ")\n", "file_list = []\n", - "total_sequences = 0\n", - "fsequences = []\n", - "\n", - "if sequences is None:\n", - " file_list = glob.glob(fp_path.format(0).replace('00000', '*'))\n", - " file_list = sorted(file_list, key = lambda x: (len (x), x))\n", - " total_sequences = len(file_list)\n", - " fsequences = range(total_sequences)\n", - "else:\n", - " for seq in sequences:\n", - " abs_entry = fp_path.format(seq)\n", - " if os.path.isfile(abs_entry):\n", - " file_list.append(abs_entry)\n", - " total_sequences += 1\n", - " fsequences.append(seq)\n", - "\n", - "sequences = fsequences \n", - "print(f\"This run has a total number of {total_sequences} sequences.\\n\")" + "print(f\"Processing a total of {total_sequences} sequence files:\")\n", + "for f in mapped_files[karabo_da].queue:\n", + " file_list.append(f)\n", + " print(f)" ] }, { @@ -251,25 +225,24 @@ "source": [ "# extract slow data\n", "if karabo_da_control:\n", - " ctrl_fname = os.path.join(ped_dir, path_template.format(run, karabo_da_control)).format(sequences[0])\n", + " mdl_path = f\"{karabo_id}/MDL/{'{}'}\"\n", " ctrl_path = h5path_ctrl.format(karabo_id)\n", - " mdl_ctrl_path = f\"/CONTROL/{karabo_id}/MDL/\"\n", - "\n", - " (bias_voltage, gain,\n", - " fix_temperature_top,\n", - " fix_temperature_bot) = extract_slow_data(karabo_id, karabo_da_control, ctrl_fname, ctrl_path,\n", - " mdl_ctrl_path, bias_voltage, gain,\n", - " fix_temperature_top, fix_temperature_bot)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + " ctrl_dc = RunDirectory(ped_dir).select(\n", + " [\n", + " [ctrl_path, \"*\"],\n", + " [mdl_path.format(\"*\"), \"*\"],]\n", + " )\n", + "\n", + " bias_voltage, gain, fix_temperature_top, fix_temperature_bot = extract_slow_data( # noqa\n", + " ctrl_dc,\n", + " ctrl_path,\n", + " mdl_path,\n", + " bias_voltage,\n", + " gain,\n", + " fix_temperature_top,\n", + " fix_temperature_bot,\n", + " )\n", "# Printing the Parameters Read from the Data File:\n", - "\n", "display(Markdown('### Detector Parameters'))\n", "print(f\"Bias voltage is {bias_voltage:0.1f} V.\")\n", "print(f\"Detector gain is set to 1/{int(gain)}.\")\n", @@ -360,18 +333,6 @@ "b_range = Event_Bin_Dict[\"b_range\"]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(Markdown('### List of files to be Processed'))\n", - "print(\"Reading data from the following files: \")\n", - "for i in range(len(file_list)):\n", - " print(file_list[i])" - ] - }, { "cell_type": "code", "execution_count": null, @@ -386,6 +347,7 @@ "# Sensor size and block size definitions (important for common mode and other corrections):\n", "\n", "sensorSize = [pixels_x, pixels_y]\n", + "run_parallel = False\n", "blockSize = [sensorSize[0]//2, sensorSize[1]//2] # sensor area will be analysed according to blockSize\n", "xcal.defaultBlockSize = blockSize # for xcal.HistogramCalculators \n", "memoryCells = 1 # pnCCD has 1 memory cell" @@ -468,9 +430,16 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, + "outputs": [], + "source": [ + "## Constants retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "display(Markdown('### Dark Data Retrieval'))\n", @@ -550,11 +519,10 @@ "if corr_bools.get('common_mode'):\n", " # Common Mode Correction Calculator:\n", " cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],\n", - " commonModeBlockSize,\n", + " blockSize,\n", " commonModeAxis,\n", - " parallel=False, dType=np.float32, stride=1,\n", + " parallel=run_parallel, dType=np.float32, stride=1,\n", " noiseMap=constants[\"Noise\"].astype(np.float32), minFrac=0.25)\n", - " cmCorrection.debug()\n", "\n", "if corr_bools.get('pattern_class'):\n", " # Pattern Classifier Calculator:\n", @@ -569,7 +537,7 @@ " cores=cpuCores, # XFELDetAna/algorithms)\n", " allowElongated=False,\n", " blockSize=[pixels_x, pixels_y//2],\n", - " runParallel=True)\n", + " parallel=run_parallel)\n", "\n", " # Right Hemisphere:\n", " patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],\n", @@ -582,13 +550,13 @@ " cores=cpuCores,\n", " allowElongated=False,\n", " blockSize=[pixels_x, pixels_y//2],\n", - " runParallel=True)\n", + " parallel=run_parallel)\n", "\n", - " patternClassifierLH._imagesPerChunk = 500\n", - " patternClassifierRH._imagesPerChunk = 500\n", - " patternClassifierLH.debug()\n", - " patternClassifierRH.debug()\n", + " patternClassifierLH._imagesPerChunk = 1\n", + " patternClassifierRH._imagesPerChunk = 1\n", "\n", + " patternClassifierLH._noisemap = constants[\"Noise\"][:, :pixels_x//2]\n", + " patternClassifierRH._noisemap = constants[\"Noise\"][:, pixels_x//2:]\n", " # Setting bad pixels:\n", " patternClassifierLH.setBadPixelMask(constants[\"BadPixelsDark\"][:, :pixels_y//2] != 0)\n", " patternClassifierRH.setBadPixelMask(constants[\"BadPixelsDark\"][:, pixels_y//2:] != 0)" @@ -607,64 +575,137 @@ "source": [ "#***************** Histogram Calculators ******************#\n", "# Will contain uncorrected data:\n", - "histCalRaw = xcal.HistogramCalculator(sensorSize, \n", - " bins=bins, \n", - " range=bin_range,\n", - " nCells=memoryCells, \n", - " cores=cpuCores,\n", - " blockSize=blockSize) \n", - "histCalRaw.debug()\n", + "histCalRaw = xcal.HistogramCalculator(\n", + " sensorSize, \n", + " bins=bins, \n", + " range=bin_range,\n", + " nCells=memoryCells,\n", + " cores=cpuCores,\n", + " blockSize=blockSize,\n", + " parallel=run_parallel,\n", + ")\n", "# Will contain offset corrected data:\n", - "histCalOffsetCor = xcal.HistogramCalculator(sensorSize, \n", - " bins=bins, \n", - " range=bin_range,\n", - " nCells=memoryCells, \n", - " cores=cpuCores,\n", - " blockSize=blockSize)\n", - "histCalOffsetCor.debug()\n", + "histCalOffsetCor = xcal.HistogramCalculator(\n", + " sensorSize, \n", + " bins=bins, \n", + " range=bin_range,\n", + " nCells=memoryCells, \n", + " cores=cpuCores,\n", + " blockSize=blockSize,\n", + " parallel=run_parallel,\n", + ")\n", "if corr_bools.get('common_mode'):\n", " # Will contain common mode corrected data:\n", - " histCalCommonModeCor = xcal.HistogramCalculator(sensorSize, \n", - " bins=bins, \n", - " range=bin_range,\n", - " nCells=memoryCells, \n", - " cores=cpuCores,\n", - " blockSize=blockSize)\n", - " histCalCommonModeCor.debug()\n", + " histCalCommonModeCor = xcal.HistogramCalculator(\n", + " sensorSize,\n", + " bins=bins,\n", + " range=bin_range,\n", + " nCells=memoryCells,\n", + " cores=cpuCores,\n", + " blockSize=blockSize,\n", + " parallel=run_parallel,\n", + " )\n", " \n", "if corr_bools.get('pattern_class'):\n", " # Will contain split events pattern data:\n", - " histCalPcorr = xcal.HistogramCalculator(sensorSize, \n", - " bins=bins, \n", - " range=bin_range,\n", - " nCells=memoryCells, \n", - " cores=cpuCores,\n", - " blockSize=blockSize)\n", - " histCalPcorr.debug()\n", + " histCalPcorr = xcal.HistogramCalculator(\n", + " sensorSize,\n", + " bins=bins,\n", + " range=bin_range,\n", + " nCells=memoryCells, \n", + " cores=cpuCores,\n", + " blockSize=blockSize,\n", + " parallel=run_parallel,\n", + " )\n", " # Will contain singles events data:\n", - " histCalPcorrS = xcal.HistogramCalculator(sensorSize, \n", - " bins=bins, \n", - " range=bin_range,\n", - " nCells=memoryCells, \n", - " cores=cpuCores,\n", - " blockSize=blockSize)\n", - " histCalPcorrS.debug()\n", + " histCalPcorrS = xcal.HistogramCalculator(\n", + " sensorSize,\n", + " bins=bins,\n", + " range=bin_range,\n", + " nCells=memoryCells,\n", + " cores=cpuCores,\n", + " blockSize=blockSize,\n", + " parallel=run_parallel,\n", + " )\n", "if corr_bools.get('relgain'):\n", " # Will contain gain corrected data:\n", - " histCalGainCor = xcal.HistogramCalculator(sensorSize, \n", - " bins=bins, \n", - " range=bin_range,\n", - " nCells=memoryCells, \n", - " cores=cpuCores,\n", - " blockSize=blockSize)\n", - " histCalGainCor.debug()" + " histCalGainCor = xcal.HistogramCalculator(\n", + " sensorSize,\n", + " bins=bins,\n", + " range=bin_range,\n", + " nCells=memoryCells,\n", + " cores=cpuCores,\n", + " blockSize=blockSize,\n", + " parallel=run_parallel,\n", + " )" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Applying corrections to the raw data" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "Applying offset and common mode corrections to the raw data" + "def correct_train(wid, index, tid, data):\n", + " \n", + " data = np.squeeze(data[h5path][\"data.image\"].astype(np.float32))\n", + " raw_data[..., index] = data\n", + " # equating bad pixels' values to np.nan\n", + " # so that the pattern classifier ignores them:\n", + " # TODO: To clear this up. Is it on purpose to save corrected data with nans?\n", + " data[bpix != 0] = np.nan\n", + "\n", + " data -= offset # offset correction\n", + " # TODO: to clear this up. why save the badpixels map in the corrected data?\n", + " bpix_data[..., index] = bpix\n", + " off_data[..., index] = data\n", + "\n", + " # cm: common mode\n", + " if corr_bools.get('common_mode'):\n", + "\n", + " # common mode correction:\n", + " data = np.squeeze(cmCorrection.correct(\n", + " data, # common mode correction\n", + " cellTable=cell_table,\n", + " ))\n", + "\n", + " # discarding events caused by saturated pixels:\n", + " # we equate these values to np.nan so that the pattern classifier ignores them:\n", + " data[data >= saturated_threshold] = np.nan\n", + " cm_data[..., index] = data\n", + "\n", + " if corr_bools.get('relgain'):\n", + " data /= rg # relative gain correction\n", + " rg_data[..., index] = rg.astype(np.float32) # TODO: Why saving the calibration constant data?\n", + " rg_corr_data[..., index] = data\n", + "\n", + " if corr_bools.get('pattern_class'):\n", + "\n", + " # Dividing the data into left and right hemispheres:\n", + " dataLH = data[:, :pixels_x//2]\n", + " dataRH = data[:, pixels_x//2:]\n", + " # pattern classification on corrected data\n", + " dataLH, patternsLH = patternClassifierLH.classify(dataLH)\n", + " dataRH, patternsRH = patternClassifierRH.classify(dataRH)\n", + "\n", + " data[:, :pixels_x//2] = np.squeeze(dataLH)\n", + " data[:, pixels_x//2:] = np.squeeze(dataRH)\n", + " patterns = np.zeros(data.shape, patternsLH.dtype)\n", + " patterns[:, :pixels_x//2] = np.squeeze(patternsLH)\n", + " patterns[:, pixels_x//2:] = np.squeeze(patternsRH)\n", + "\n", + " data[data < split_evt_primary_threshold*noise] = 0\n", + " cls_data[..., index] = np.squeeze(data)\n", + " ptrn_data[..., index] = np.squeeze(patterns)" ] }, { @@ -673,177 +714,178 @@ "metadata": {}, "outputs": [], "source": [ - "# Data corrections and event classifications happen here. Also, the corrected data are written to datasets:\n", + "step_timer = StepTimer()\n", + "# 10 is a number chosen after testing 1 ... 71 parallel threads\n", + "parallel_num_threads = 10\n", "\n", - "# Initialize 5 numpy array of zeros with the shape of (1024, 1024, 0)\n", - "uncor, off_cor, g_cor, cm_cor, final_cor = np.zeros((5, 1024, 1024, 0), dtype=np.float32)\n", + "# Data corrections and event classifications happen here.\n", + "# Also, the corrected data are written to datasets:\n", "\n", - "offsetMap = np.squeeze(constants[\"Offset\"])\n", - "noiseMap = np.squeeze(constants[\"Noise\"])\n", - "badPixelMap = np.squeeze(constants[\"BadPixelsDark\"])\n", + "offset = np.squeeze(constants[\"Offset\"])\n", + "noise = np.squeeze(constants[\"Noise\"])\n", + "bpix = np.squeeze(constants[\"BadPixelsDark\"])\n", + "rg = constants.get(\"RelativeGain\")\n", "\n", - "if corr_bools.get('relgain'):\n", - " relGain = constants[\"RelativeGain\"]\n", + "cell_table = np.zeros(1024, np.int32)\n", + "\n", + "plt_uncor = None\n", "\n", "for k, f in enumerate(file_list):\n", " f_dc = H5File(f)\n", - " out_fileb = f\"{out_folder}/{f.split(\"/\")[-1]}\"\n", + " out_fileb = f\"{out_folder}/{f.split('/')[-1]}\"\n", " out_file = out_fileb.replace(\"RAW\", \"CORR\")\n", "\n", - " data = None\n", - " noise = None\n", - "\n", + " context = psh.context.ThreadContext(num_workers=parallel_num_threads)\n", " try:\n", - " with h5py.File(out_file, 'w') as ofile:\n", - " # Copy RAW non-calibrated sources.\n", - " with h5py.File(f, 'r') as sfile:\n", - " h5_copy_except.h5_copy_except_paths(\n", - " sfile, ofile,\n", - " [\"INSTRUMENT/\"+h5path+\"/data/image\"])\n", - "\n", - " oshape = f_dc[h5path][\"data.image\"].shape\n", - " n_imgs = oshape[0]\n", + " step_timer.start()\n", + " dshape = f_dc[h5path, \"data.image\"].shape\n", + " n_imgs = dshape[0]\n", " # If you want to analyze only a certain number of frames\n", - " # instead of all available good frames. \n", + " # instead of all available good frames.\n", " if limit_images > 0:\n", " n_imgs = limit_images\n", + " print(f\"Correcting file: {f} of shape {(n_imgs,) + dshape[-2:]}.\")\n", "\n", " data_dc = f_dc.select(\n", - " h5path, \"data.image\", require_all=True).select_trains(np._s(n_imgs))\n", - "\n", - " data = np.moveaxis(data, 0, 2)\n", - "\n", - " # data set to save offset corrected images:\n", - " ddset = ofile.create_dataset(\n", - " h5path+\"/pixels\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32)\n", - "\n", - " # data set to create bad pixels:\n", - " ddsetm = ofile.create_dataset(\n", - " h5path+\"/mask\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.uint32, compression=\"gzip\")\n", - "\n", - " data = data.astype(np.float32) \n", - "\n", - " # Creating maps for correction usage:\n", - " offset = np.repeat(offsetMap[...,None], data.shape[2], axis=2)\n", - " noise = np.repeat(noiseMap[...,None], data.shape[2], axis=2)\n", - " bpix = np.repeat(badPixelMap[...,None], data.shape[2], axis=2)\n", - " if corr_bools.get('relgain'):\n", - " rg = np.repeat(relGain[:,:,None], data.shape[2], axis=2) # rg = relative gain\n", - "\n", - " # non-corrected images for first sequence only:\n", - " if k == 0:\n", - " uncor = np.append(uncor, data, axis=2)\n", - "\n", - " histCalRaw.fill(data) # filling histogram with raw uncorrected data\n", - "\n", - " # equating bad pixels' values to np.nan so that the pattern classifier ignores them:\n", - " data[bpix != 0] = np.nan\n", + " h5path, \"data.image\",\n", + " require_all=True).select_trains(np.s_[:n_imgs])\n", "\n", - " data -= offset # offset correction \n", + " # Allocating shared arrays for data arrays for each correction stage.\n", + " raw_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", "\n", - " # Offset corrected images for first sequence only:\n", - " if k == 0:\n", - " off_cor = np.append(off_cor, data, axis=2)\n", - " histCalOffsetCor.fill(data) # filling histogram with offset corrected data\n", - "\n", - " ddset[...] = np.moveaxis(data, 2, 0)\n", - " ddsetm[...] = np.moveaxis(bpix, 2, 0)\n", - " ofile.flush()\n", + " off_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + " bpix_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", "\n", - " # cm: common mode\n", " if corr_bools.get('common_mode'):\n", + " cm_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + " if corr_bools.get('relgain'):\n", + " rg_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + " rg_corr_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + " if corr_bools.get('pattern_class'):\n", + " cls_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + " ptrn_data = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + " final_cor = context.alloc(\n", + " shape=(dshape[1], dshape[2], n_imgs), dtype=np.float32)\n", + "\n", + " # data set to save split event corrected images:\n", + " # The calculation of the cluster map:\n", + " step_timer.done_step(f'Preparation.')\n", + " step_timer.start()\n", + " context.map(\n", + " correct_train,\n", + " data_dc,\n", + " )\n", + " \n", + " # Filling histogram calculators.\n", + " histCalRaw.fill(raw_data) # filling histogram with raw uncorrected data\n", + " histCalOffsetCor.fill(off_data) # filling histogram with offset corrected data\n", + " if corr_bools.get('common_mode'):\n", + " histCalCommonModeCor.fill(cm_data) # filling histogram with common mode corrected data\n", + " if corr_bools.get('relgain'):\n", + " histCalGainCor.fill(rg_corr_data) # filling histogram with gain corrected data\n", + " if corr_bools.get('pattern_class'): \n", + " histCalPcorr.fill(cls_data) # filling histogram with split events corrected data\n", + " cls_data[ptrn_data != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles\n", + " histCalPcorrS.fill(cls_data) # filling histogram with singles events data\n", "\n", - " # data set to save common mode corrected images:\n", - " ddsetcm = ofile.create_dataset(h5path+\"/pixels_cm\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32)\n", - "\n", - " # common mode correction:\n", - " data = cmCorrection.correct(data.astype(np.float32), # common mode correction\n", - " cellTable=np.zeros(data.shape[2], np.int32)) \n", - "\n", - " # discarding events caused by saturated pixels:\n", - " # we equate these values to np.nan so that the pattern classifier ignores them:\n", - " data[data >= saturated_threshold] = np.nan \n", - " histCalCommonModeCor.fill(data) # filling histogram with common mode corrected data\n", - " # common mode corrected images for first sequence only:\n", - " if k == 0:\n", - " cm_cor = np.append(cm_cor, data, axis=2)\n", - "\n", - " ddsetcm[...] = np.moveaxis(data, 2, 0)\n", + " step_timer.done_step(f'Correction.')\n", "\n", - " if corr_bools.get('relgain'):\n", - " # data set to save gain corrected images:\n", - " ddsetg = ofile.create_dataset(h5path+\"/gain\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32, compression=\"gzip\")\n", - "\n", - " data /= rg # relative gain correction \n", - " histCalGainCor.fill(data) # filling histogram with gain corrected data\n", - " # gain corrected images for first sequence only:\n", - " if k == 0:\n", - " g_cor = np.append(g_cor, data, axis=2)\n", - " ddsetg[...] = np.moveaxis(rg, 2, 0).astype(np.float32)\n", + " step_timer.start()\n", "\n", - " if corr_bools.get('pattern_class'):\n", - " # data set to save split event corrected images:\n", - " # c: classifications, p: even patterns\n", - " ddsetc = ofile.create_dataset(h5path+\"/pixels_classified\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32, compression=\"gzip\")\n", - " # data set to save different valid patterns:\n", - " ddsetp = ofile.create_dataset(h5path+\"/patterns\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.int32, compression=\"gzip\")\n", - "\n", - " # The calculation of the cluster map:\n", - " patternClassifierLH._noisemap = noise[:, :pixels_x//2, :]\n", - " patternClassifierRH._noisemap = noise[:, pixels_x//2:, :]\n", - "\n", - " # Dividing the data into left and right hemispheres:\n", - " dataLH = data[:, :pixels_x//2, :]\n", - " dataRH = data[:, pixels_x//2:, :]\n", - "\n", - " # pattern classification on corrected data\n", - " dataLH, patternsLH = patternClassifierLH.classify(dataLH) \n", - " dataRH, patternsRH = patternClassifierRH.classify(dataRH)\n", - "\n", - " data[:, :pixels_x//2, :] = dataLH\n", - " data[:, pixels_x//2:, :] = dataRH\n", - "\n", - " patterns = np.zeros(data.shape, patternsLH.dtype)\n", - " patterns[:, :pixels_x//2, :] = patternsLH\n", - " patterns[:, pixels_x//2:, :] = patternsRH\n", - "\n", - " data[data < split_evt_primary_threshold*noise] = 0\n", - " ddsetc[...] = np.moveaxis(data, 2, 0)\n", - " ddsetp[...] = np.moveaxis(patterns, 2, 0)\n", - "\n", - " histCalPcorr.fill(data) # filling histogram with split events corrected data\n", - " data[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles\n", - " histCalPcorrS.fill(data) # filling histogram with singles events data\n", - "\n", - " # split event corrected images for first sequence only (also these events are only \n", - " # singles events):\n", - " if k == 0:\n", - " final_cor = np.append(final_cor, data, axis=2)\n", - "\n", - "except Exception as e:\n", - " print(f\"Couldn't calibrate data in {f}: {e}\\n\")\n", + " if k == 0:\n", + " plt_uncor = raw_data\n", + " # Offset corrected images for first sequence only:\n", + " off_cor = np.copy(off_data)\n", + " final_cor = off_cor\n", + " # cm: common mode\n", + " if corr_bools.get('common_mode'):\n", + " # common mode corrected images for first sequence only:\n", + " cm_cor = np.copy(cm_data)\n", + " final_cor = cm_cor\n", + " if corr_bools.get('relgain'):\n", + " # gain corrected images for first sequence only:\n", + " g_cor = np.copy(rg_corr_data)\n", + " final_cor = g_cor\n", + " if corr_bools.get('pattern_class'):\n", + " # split event corrected images for first sequence only\n", + " # (also these events are only singles events):\n", + " final_cor = cls_data\n", + " with h5py.File(out_file, 'w') as ofile:\n", + " # Copy RAW non-calibrated sources.\n", + " with h5py.File(f, 'r') as sfile:\n", + " h5_copy_except.h5_copy_except_paths(\n", + " sfile, ofile,\n", + " [\"INSTRUMENT/\"+h5path+\"/data/image\"],\n", + " )\n", + " # TODO: to clear this up: why save corrected data in data/pixels rather than data/image.\n", + " # corr_run : /gpfs/exfel/d/proc/SQS/202131/p900236/r0044/CORR-R0044-PNCCD01-S00000.h5 still has data/image. most probably a bug.\n", + " # data set to save offset corrected images:\n", + " ddset = ofile.create_dataset(\n", + " \"INSTRUMENT/\"+h5path+\"/data/pixels\",\n", + " data=np.moveaxis(off_data, 2, 0),\n", + " chunks=(chunk_size_idim, dshape[1], dshape[2]),\n", + " dtype=np.float32,\n", + " )\n", + " # data set to create bad pixels:\n", + " ddsetm = ofile.create_dataset(\n", + " \"INSTRUMENT/\"+h5path+\"/data/mask\",\n", + " data=np.moveaxis(bpix_data, 2, 0),\n", + " chunks=(chunk_size_idim, dshape[1], dshape[2]),\n", + " dtype=np.uint32,\n", + " compression=\"gzip\",\n", + " )\n", + " # cm: common mode\n", + " if corr_bools.get('common_mode'):\n", + " # data set to save common mode corrected images:\n", + " ddsetcm = ofile.create_dataset(\n", + " \"INSTRUMENT/\"+h5path+\"/data/pixels_cm\",\n", + " data=np.moveaxis(cm_data, 2, 0),\n", + " chunks=(chunk_size_idim, dshape[1], dshape[2]),\n", + " dtype=np.float32,\n", + " )\n", + " if corr_bools.get('relgain'):\n", + " # data set to save gain corrected images:\n", + " ddsetg = ofile.create_dataset(\n", + " \"INSTRUMENT/\"+h5path+\"/data/gain\",\n", + " data=np.moveaxis(rg_data, 2, 0),\n", + " chunks=(chunk_size_idim, dshape[1], dshape[2]),\n", + " dtype=np.float32,\n", + " compression=\"gzip\",\n", + " )\n", + " data = corr_data\n", + "\n", + " if corr_bools.get('pattern_class'):\n", + " # c: classifications, p: even patterns\n", + " ddsetc = ofile.create_dataset(\n", + " \"INSTRUMENT/\"+h5path+\"/data/pixels_classified\",\n", + " data=np.moveaxis(cls_data, 2, 0),\n", + " chunks=(chunk_size_idim, dshape[1], dshape[2]),\n", + " dtype=np.float32,\n", + " compression=\"gzip\",\n", + " )\n", + " # data set to save different valid patterns:\n", + " ddsetp = ofile.create_dataset(\n", + " \"INSTRUMENT/\"+h5path+\"/data/patterns\",\n", + " data=np.moveaxis(ptrn_data, 2, 0),\n", + " chunks=(chunk_size_idim, dshape[1], dshape[2]),\n", + " dtype=np.int32,\n", + " compression=\"gzip\",\n", + " )\n", + " step_timer.done_step(f'Storing data.')\n", + " \"\"\"\n", + " except Exception as e:\n", + " print(f\"Couldn't calibrate data in {f}: {e}\\n\")\n", + " traceback.print_exc(limit=1)\n", "\n", "print(\"In addition to offset correction, the following corrections were performed:\")\n", - "print[k for k, v in corr_bools.items() if v]\n", - "\n", "for k, v in corr_bools.items():\n", " if v:\n", " print(k)" @@ -862,7 +904,7 @@ "# if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as \n", "# RawHistVals and so on into a 1D array instead of keeping the original shape:\n", "\n", - "RawHistVals, _, RawHistMids, _ = histCalRaw.get() \n", + "RawHistVals, _, RawHistMids, _ = histCalRaw.get()\n", "off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()\n", "if corr_bools.get('common_mode'):\n", " cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()\n", @@ -880,7 +922,7 @@ "outputs": [], "source": [ "# Saving intermediate data to disk:\n", - "\n", + "step_timer.start()\n", "np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)\n", "np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)\n", "if corr_bools.get('common_mode'):\n", @@ -890,7 +932,7 @@ "if corr_bools.get('pattern_class'):\n", " np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)\n", " np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)\n", - "\n", + "step_timer.done_step(f'Saving intermediate data to disk.')\n", "print(\"Various spectra are saved to disk in the form of histograms. Please check {}\".format(out_folder))" ] }, @@ -901,6 +943,7 @@ "outputs": [], "source": [ "display(Markdown('### Raw vs. Corrected Spectra'))\n", + "step_timer.start()\n", "\n", "figure = [{'x': RawHistMids,\n", " 'y': RawHistVals,\n", @@ -937,8 +980,8 @@ " 'errorstyle': 'bars',\n", " 'errorcoarsing': 2,\n", " 'label': 'Gain Corrected'})\n", - " \n", - "if corr_bools.get('pattern_class'): \n", + "\n", + "if corr_bools.get('pattern_class'):\n", " figure.extend([{'x': split_HistMids,\n", " 'y': split_HistVals,\n", " 'y_err': np.sqrt(split_HistVals[:]),\n", @@ -957,7 +1000,8 @@ " }])\n", "fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',\n", " y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',\n", - " legend='top-right-frame-1col')" + " legend='top-right-frame-1col')\n", + "step_timer.done_step('Plotting (Raw vs. Corrected Spectra.)')" ] }, { @@ -969,7 +1013,7 @@ "# This function plots pattern statistics:\n", "\n", "def classification_plot(patternStats, hemisphere):\n", - " \n", + "\n", " print(\"****************** {} HEMISPHERE ******************\\n\"\n", " .format(hemisphere))\n", " fig = plt.figure(figsize=(15, 15))\n", @@ -994,7 +1038,6 @@ "\n", " smaps = [\"singlemap\", \"firstsinglemap\", \"clustermap\"]\n", " for i, m in enumerate(smaps):\n", - "\n", " ax = fig.add_subplot(4, 4, 2+i)\n", " pmap = ax.imshow(patternStats[m], interpolation=\"nearest\", vmax=2*np.nanmedian(patternStats[m]))\n", " ax.set_title(m)\n", @@ -1003,7 +1046,6 @@ " mmaps = [\"doublemap\", \"triplemap\", \"quadmap\"]\n", " k = 0\n", " for i, m in enumerate(mmaps):\n", - "\n", " for j in range(4):\n", " ax = fig.add_subplot(4, 4, 2+len(smaps)+k)\n", " pmap = ax.imshow(patternStats[m][j], interpolation=\"nearest\", vmax=2*np.median(patternStats[m][j]))\n", @@ -1043,7 +1085,7 @@ "outputs": [], "source": [ "display(Markdown('### Classification Results - Tabulated Statistics'))\n", - "\n", + "step_timer.start()\n", "if corr_bools.get('pattern_class'):\n", " t0 = PrettyTable()\n", " t0.title = \"Total Number of Counts after All Corrections\"\n", @@ -1063,7 +1105,8 @@ " patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])\n", " t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3], \n", " patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])\n", - " print(t1)" + " print(t1)\n", + "step_timer.done_step('Classification Results - Tabulated Statistics')" ] }, { @@ -1118,6 +1161,7 @@ "outputs": [], "source": [ "display(Markdown('### Classification Results - Pie Charts'))\n", + "step_timer.start()\n", "\n", "if corr_bools.get('pattern_class'):\n", " fig = plt.figure(figsize=(12, 7))\n", @@ -1131,7 +1175,8 @@ " pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)\n", " ax.set_title(\"Pattern Occurrence in RH\")\n", " # Set aspect ratio to be equal so that pie is drawn as a circle.\n", - " a = ax.axis('equal')" + " a = ax.axis('equal')\n", + "step_timer.done_step('Classification Results - Pie Charts')" ] }, { @@ -1148,12 +1193,11 @@ "ExecuteTime": { "end_time": "2018-12-06T16:10:56.212586Z", "start_time": "2018-12-06T16:10:56.204731Z" - }, - "scrolled": false + } }, "outputs": [], "source": [ - "uncor_mean_im = np.nanmean(uncor, axis=2)\n", + "uncor_mean_im = np.nanmean(plt_uncor, axis=2)\n", "offset_mean_im = np.nanmean(off_cor, axis=2)\n", "\n", "if corr_bools.get('common_mode'):\n", @@ -1206,12 +1250,11 @@ "ExecuteTime": { "end_time": "2018-12-06T16:11:08.317130Z", "start_time": "2018-12-06T16:11:05.788655Z" - }, - "scrolled": false + } }, "outputs": [], "source": [ - "fig = xana.heatmapPlot(uncor[:,:,0], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1, \n", + "fig = xana.heatmapPlot(plt_uncor[:,:,0], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1, \n", " x_range=(0, pixels_y), y_range=(0, pixels_x), \n", " panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)', \n", " title = 'Uncorrected Image (First Frame of the First Sequence)')\n", @@ -1282,39 +1325,37 @@ " doubles = []\n", " triples = []\n", " quads = []\n", - "\n", - " with h5py.File(\"{}/CORR-R{:04d}-PNCCD01-S{:05d}.h5\".format(out_folder, run, sequences[seq_num]), 'r', \n", - " driver='core') as infile:\n", - "\n", - " data = infile[h5path+\"/pixels_classified\"][()].astype(np.float32) # classifications\n", - " patterns = infile[h5path+\"/patterns\"][()].astype(np.float32) # event patterns\n", - "\n", - " # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),\n", - " # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns, \n", - " # there are left, right, up, and down indices.\n", - "\n", - " # Separating the events:\n", - " # Singles and First Singles:\n", - " for s in range(100, 102):\n", - " single = copy.copy(data[...])\n", - " single[patterns != s] = np.nan\n", - " singles.append(single)\n", - "\n", - "\n", - " for d in range(200, 204):\n", - " double = copy.copy(data[...])\n", - " double[patterns != d] = np.nan\n", - " doubles.append(double)\n", - "\n", - " for t in range(300, 304):\n", - " triple = copy.copy(data[...])\n", - " triple[patterns != t] = np.nan\n", - " triples.append(triple) \n", - "\n", - " for q in range(400, 404):\n", - " quad = copy.copy(data[...])\n", - " quad[patterns != q] = np.nan\n", - " quads.append(quad)" + " with H5File(\n", + " f\"{out_folder}/{path_template.format(run, karabo_da, sequences[0]).replace('RAW', 'CORR')}\"\n", + " ) as dc:\n", + " data = dc[h5path, \"data.pixels_classified\"].ndarray()\n", + " patterns = dc[h5path, \"data.patterns\"].ndarray()\n", + " # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),\n", + " # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns, \n", + " # there are left, right, up, and down indices.\n", + "\n", + " # Separating the events:\n", + " # Singles and First Singles:\n", + " for s in range(100, 102):\n", + " single = copy.copy(data[...])\n", + " single[patterns != s] = np.nan\n", + " singles.append(single)\n", + "\n", + "\n", + " for d in range(200, 204):\n", + " double = copy.copy(data[...])\n", + " double[patterns != d] = np.nan\n", + " doubles.append(double)\n", + "\n", + " for t in range(300, 304):\n", + " triple = copy.copy(data[...])\n", + " triple[patterns != t] = np.nan\n", + " triples.append(triple) \n", + "\n", + " for q in range(400, 404):\n", + " quad = copy.copy(data[...])\n", + " quad[patterns != q] = np.nan\n", + " quads.append(quad)" ] }, { diff --git a/src/cal_tools/pnccdlib.py b/src/cal_tools/pnccdlib.py index 785d289c359ffe6d3e62aa15bb6e41de56d4145d..fc5d6229607f43dbabf9ef341b65e4eba6deb8b3 100644 --- a/src/cal_tools/pnccdlib.py +++ b/src/cal_tools/pnccdlib.py @@ -4,7 +4,7 @@ import traceback from typing import Tuple import h5py - +from extra_data import H5File, SourceNameError VALID_GAINS = { "a" : 1.0, "b" : 4.0, @@ -16,41 +16,34 @@ VALID_GAINS = { } -def extract_slow_data(karabo_id: str, karabo_da_control: str, - ctrl_fname: str, ctrl_path: str, - mdl_ctrl_path: str, - bias_voltage: float, gain: float, - fix_temperature_top :float, - fix_temperature_bot :float, - ) -> Tuple[float, float, float, float]: +def extract_slow_data( + ctrl_dc: "extra_data.DataCollection", + ctrl_path: str, + mdl_path: str, + bias_voltage: float, gain: float, + fix_temperature_top :float, + fix_temperature_bot :float, + ) -> Tuple[float, float, float, float]: """ Extract slow data from given control paths. """ try: - with h5py.File(ctrl_fname, "r") as f: - if bias_voltage == 0.: - bias_voltage = abs(f[os.path.join(mdl_ctrl_path, - "DAQ_MPOD/u0voltage/value")][0]) # noqa - if gain == 0.1: - gain = f[os.path.join(mdl_ctrl_path, - "DAQ_GAIN/pNCCDGain/value")][0] - if fix_temperature_top == 0.: - fix_temperature_top = f[os.path.join(ctrl_path, - "inputA/krdg/value")][0] - if fix_temperature_bot == 0.: - fix_temperature_bot = f[os.path.join(ctrl_path, - "inputB/krdg/value")][0] - except IOError: - print("Error during reading slow data," - " please check the given h5 path for the control parameters") + if bias_voltage == 0.: + bias_voltage = abs(ctrl_dc[ + mdl_path.format("DAQ_MPOD"), "u0voltage.value"][0].ndarray()[0]) # noqa + if gain == 0.1: + gain = abs(ctrl_dc[ + mdl_path.format("DAQ_GAIN"), "pNCCDGain.value"][0].ndarray()[0]) # noqa + if fix_temperature_top == 0.: + fix_temperature_top = ctrl_dc[ + ctrl_path, "inputA.krdg.value"][0].ndarray()[0] + if fix_temperature_bot == 0.: + fix_temperature_bot = ctrl_dc[ + ctrl_path, "inputB.krdg.value"][0].ndarray()[0] # noqa + except SourceNameError: + traceback.print_exc(limit=1) + print("Available sources are {ctrl_dc.all_sources}") + except Exception as e: + print("ERROR while reading slow data: {e}.") traceback.print_exc(limit=1) - print("bias voltage control h5path:", - os.path.join(mdl_ctrl_path, "DAQ_MPOD/u0voltage/value")) - print("gain control h5path:", - os.path.join(mdl_ctrl_path, "DAQ_GAIN/pNCCDGain/value")) - print("fix_temperature_top control h5path:", - os.path.join(ctrl_path, "inputA/krdg/value")) - print("fix_temperature_bot control h5path:", - os.path.join(ctrl_path, "inputB/krdg/value")) - return bias_voltage, gain, fix_temperature_top, fix_temperature_bot