From 2b99c4d18b68c6f913ef96f984317c526e7346f0 Mon Sep 17 00:00:00 2001 From: ahmedk <karim.ahmed@xfel.eu> Date: Wed, 17 Nov 2021 17:07:08 +0100 Subject: [PATCH] Update pnccd correct based on MR comments --- .../pnCCD/Characterize_pnCCD_Dark_NBC.ipynb | 30 ++- notebooks/pnCCD/Correct_pnCCD_NBC.ipynb | 175 ++++++++++-------- 2 files changed, 114 insertions(+), 91 deletions(-) diff --git a/notebooks/pnCCD/Characterize_pnCCD_Dark_NBC.ipynb b/notebooks/pnCCD/Characterize_pnCCD_Dark_NBC.ipynb index a1e303b74..cb1e13f64 100644 --- a/notebooks/pnCCD/Characterize_pnCCD_Dark_NBC.ipynb +++ b/notebooks/pnCCD/Characterize_pnCCD_Dark_NBC.ipynb @@ -49,7 +49,7 @@ "fix_temperature_top = 0. # fix temperature of top pnCCD sensor in K. Set to 0, to use the value from slow data\n", "fix_temperature_bot = 0. # fix temperature of bottom pnCCD sensor in K. Set to 0, to use the value from slow data\n", "temp_limits = 5 # temperature limits in which calibration parameters are considered equal\n", - "gain = -1 # the detector's gain setting. Set to -1 to use the value from the slow data\n", + "gain = -1 # the detector's gain setting. Set to -1 to use the value from the slow data.\n", "bias_voltage = 0. # the detector's bias voltage. set to 0. to use the value from slow data.\n", "integration_time = 70 # detector's integration time.\n", "\n", @@ -426,24 +426,23 @@ "def correct_image(wid, idx, d):\n", " d -= offsetMap\n", " offset_corr_data[idx, ...] = d\n", - " histCalCorrected.fill(d)\n", "\n", " d = np.squeeze(cmCorrection.correct(\n", " d[..., np.newaxis],\n", " cellTable=np.zeros((1,), np.int32)\n", " ))\n", - " histCalCMCorrected.fill(d)\n", " corr_data[idx, ...] = d\n", "\n", "context = psh.context.ThreadContext(num_workers=10)\n", "\n", - "corr_data = context.alloc(\n", - " shape=(n_trains, sensorSize[0], sensorSize[1]), dtype=np.float32)\n", - "offset_corr_data = context.alloc(\n", - " shape=(n_trains, sensorSize[0], sensorSize[1]), dtype=np.float32)\n", + "data_shape = (n_trains, sensorSize[0], sensorSize[1])\n", "\n", - "context.map(correct_image, data.copy())\n", + "corr_data = context.alloc(shape=data_shape, dtype=np.float32)\n", + "offset_corr_data = context.alloc(shape=data_shape, dtype=np.float32)\n", "\n", + "context.map(correct_image, data.copy())\n", + "histCalCorrected.fill(offset_corr_data)\n", + "histCalCMCorrected.fill(corr_data)\n", "step_timer.done_step(f'Offset and common mode corrections are applied.')" ] }, @@ -675,22 +674,19 @@ " d_off[d_off_cm > event_threshold] = np.nan\n", " d_off_cm[d_off_cm > event_threshold] = np.nan \n", "\n", - " histCalCorrected.fill(d_off[..., np.newaxis])\n", - "\n", " offset_corr_data[idx, ...] = d_off # Will be used for plotting\n", - " \n", " corr_data[idx, ...] = np.squeeze(d_off_cm)\n", - " histCalCorrected.fill(d_off)\n", - " histCalCMCorrected.fill(d_off_cm)\n", + "\n", "context = psh.context.ThreadContext(num_workers=10)\n", "\n", - "corr_data = context.alloc(\n", - " shape=(n_trains, sensorSize[0], sensorSize[1]), dtype=np.float32)\n", - "offset_corr_data = context.alloc(\n", - " shape=(n_trains, sensorSize[0], sensorSize[1]), dtype=np.float32)\n", + "corr_data = context.alloc(shape=data_shape, dtype=np.float32)\n", + "offset_corr_data = context.alloc(shape=data_shape, dtype=np.float32)\n", "\n", "context.map(correct_image, data.copy())\n", "\n", + "histCalCorrected.fill(offset_corr_data)\n", + "histCalCMCorrected.fill(corr_data)\n", + "\n", "print(f\"A total number of {n_trains} images are processed.\")\n", "step_timer.done_step(\"Final iteration is Performed.\")" ] diff --git a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb index 181eb9825..54f5d78c1 100644 --- a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb +++ b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb @@ -18,34 +18,29 @@ "outputs": [], "source": [ "in_folder = \"/gpfs/exfel/exp/SQS/202031/p900166/raw\" # input folder\n", - "out_folder = '' # output folder\n", + "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/remove\" # output folder\n", "run = 347 # which run to read data from\n", - "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", + "sequences = [0] # sequences to correct, set to -1 for all, range allowed\n", "sequences_per_node = 1 # number of sequences running on the same slurm node.\n", "\n", "karabo_da = 'PNCCD01' # data aggregators\n", "karabo_id = \"SQS_NQS_PNCCD1MP\" # karabo prefix of PNCCD devices\n", "receiver_id = \"PNCCD_FMT-0\" # inset for receiver devices\n", "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "instrument_source_template = '{}/CAL/{}:output' # path to data in the HDF5 file \n", - "\n", - "overwrite = True # keep this as True to not overwrite the output \n", + "instrument_source_template = '{}/CAL/{}:output' # template for data source name, will be filled with karabo_id and receiver_id.\n", "\n", "# Parameters affecting data correction.\n", + "commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column\n", "commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations\n", - "commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column \n", "split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise\n", "split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise\n", "saturated_threshold = 32000. # full well capacity in ADU\n", - "chunk_size_idim = 1 # H5 chunking size of output data\n", - "# ONLY FOR TESTING\n", - "limit_images = 0 # this parameter is used for limiting number of images to correct from a sequence file. ONLY FOR TESTING.\n", "\n", "# Conditions for retrieving calibration constants\n", "set_parameters_manually = False # Boolean for setting parameter values manually instead of reading control values.\n", "fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.\n", "fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.\n", - "gain = -1 # the detector's gain setting, It is later read from file and this value is overwritten\n", + "gain = -1 # the detector's gain setting. Set to -1 to use the value from the slow data.\n", "bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.\n", "integration_time = 70 # detector's integration time\n", "photon_energy = 1.6 # Al fluorescence in keV\n", @@ -61,6 +56,12 @@ "relgain = True # Apply relative gain correction\n", "pattern_classification = True # classify split events\n", "\n", + "# parameters affecting stored output data.\n", + "chunk_size_idim = 1 # H5 chunking size of output data\n", + "overwrite = True # keep this as True to not overwrite the output \n", + "# ONLY FOR TESTING\n", + "limit_images = 0 # this parameter is used for limiting number of images to correct from a sequence file. ONLY FOR TESTING.\n", + "\n", "# TODO: REMOVE\n", "db_module = \"\"\n", "\n", @@ -134,8 +135,14 @@ "source": [ "# Calibration Database Settings, and Some Initial Run Parameters & Paths:\n", "display(Markdown('### Initial Settings and Paths'))\n", - "pixels_x = 1024 # rows of pnCCD in pixels \n", - "pixels_y = 1024 # columns of pnCCD in pixels\n", + "\n", + "# Sensor size and block size definitions (important for common mode and other corrections):\n", + "pixels_x = 1024 # rows of pnCCD in pixels \n", + "pixels_y = 1024 # columns of pnCCD in pixels\n", + "sensorSize = [pixels_x, pixels_y]\n", + "# For xcal.HistogramCalculators.\n", + "blockSize = [pixels_x//2, pixels_y//2] # sensor area will be analysed according to blockSize.\n", + "\n", "print(f\"pnCCD size is: {pixels_x}x{pixels_y} pixels.\")\n", "print(f'Calibration database interface selected: {cal_db_interface}')\n", "\n", @@ -213,7 +220,7 @@ "outputs": [], "source": [ "seq_files = []\n", - "for f in run_dc.select(f\"*{karabo_id}*\").files:\n", + "for f in run_dc.select(instrument_src).files:\n", " fpath = Path(f.filename)\n", " if fpath.match(f\"*{karabo_da}*.h5\"):\n", " seq_files.append(fpath)\n", @@ -305,18 +312,6 @@ "b_range = Event_Bin_Dict[\"b_range\"]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Sensor size and block size definitions (important for common mode and other corrections):\n", - "sensorSize = [pixels_x, pixels_y]\n", - "blockSize = [sensorSize[0]//2, sensorSize[1]//2] # sensor area will be analysed according to blockSize\n", - "xcal.defaultBlockSize = blockSize # for xcal.HistogramCalculators " - ] - }, { "cell_type": "code", "execution_count": null, @@ -478,7 +473,7 @@ "if corr_bools.get('common_mode'):\n", " # Common Mode Correction Calculator:\n", " cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],\n", - " blockSize,\n", + " commonModeBlockSize,\n", " commonModeAxis,\n", " parallel=False, dType=np.float32, stride=1,\n", " noiseMap=constants[\"Noise\"].astype(np.float32), minFrac=0.25)\n", @@ -515,8 +510,8 @@ " patternClassifierLH._noisemap = constants[\"Noise\"][:, :pixels_x//2]\n", " patternClassifierRH._noisemap = constants[\"Noise\"][:, pixels_x//2:]\n", " # Setting bad pixels:\n", - " patternClassifierLH.setBadPixelMask(constants[\"BadPixelsDark\"][:, :pixels_y//2] != 0)\n", - " patternClassifierRH.setBadPixelMask(constants[\"BadPixelsDark\"][:, pixels_y//2:] != 0)" + " patternClassifierLH.setBadPixelMask(constants[\"BadPixelsDark\"][:, :pixels_x//2] != 0)\n", + " patternClassifierRH.setBadPixelMask(constants[\"BadPixelsDark\"][:, pixels_x//2:] != 0)" ] }, { @@ -593,17 +588,15 @@ " Equating bad pixels' values to np.nan,\n", " so that the pattern classifier ignores them:\n", " \"\"\"\n", - " histCalRaw.fill(d) # filling histogram with raw uncorrected data\n", + " d = d.copy()\n", "\n", " # TODO: To clear this up. Is it on purpose to save corrected data with nans?\n", " d[bpix != 0] = np.nan\n", - " d -= offset # offset correction\n", + " d -= offset # offset correction\n", "\n", " # TODO: to clear this up. why save the badpixels map in the corrected data?\n", " bpix_data[index, ...] = bpix\n", " data[index, ...] = d\n", - " histCalOffsetCor.fill(d) # filling histogram with offset corrected data\n", - "\n", "\n", "def common_mode(wid, index, d):\n", " \"\"\"common-mode correction.\n", @@ -613,17 +606,15 @@ " # we equate these values to np.nan so that the pattern classifier ignores them:\n", " d[d >= saturated_threshold] = np.nan\n", " data[index, ...] = d\n", - " histCalCommonModeCor.fill(d) # filling histogram with common mode corrected data\n", "\n", "\n", "def gain_correction(wid, index, d):\n", " \"\"\"relative gain correction.\"\"\"\n", - " d /= rg \n", + " d /= relativegain \n", " data[index, ...] = d\n", - " histCalGainCor.fill(d) # filling histogram with gain corrected data\n", "\n", "\n", - "def pattern_classification(wid, index, d):\n", + "def pattern_classification_correction(wid, index, d):\n", " \"\"\"pattern classification correction.\n", " data set to save split event corrected images\n", " \n", @@ -643,10 +634,9 @@ " patterns[:, pixels_x//2:] = np.squeeze(patternsRH)\n", " d[d < split_evt_primary_threshold*noise] = 0\n", " data[index, ...] = d\n", - " histCalPcorr.fill(d) # filling histogram with split events corrected data\n", " ptrn_data[index, ...] = patterns\n", - " d[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles\n", - " histCalPcorrS.fill(d) # filling histogram with singles events data" + " d[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles\n", + " filtered_data[index, ...] = d" ] }, { @@ -655,7 +645,7 @@ "metadata": {}, "outputs": [], "source": [ - "# 10 is a number chosen after testing 1 ... 71 parallel threads\n", + "# 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.\n", "parallel_num_threads = 10\n", "context = psh.context.ThreadContext(num_workers=parallel_num_threads)\n", "\n", @@ -664,11 +654,7 @@ "offset = np.squeeze(constants[\"Offset\"])\n", "noise = np.squeeze(constants[\"Noise\"])\n", "bpix = np.squeeze(constants[\"BadPixelsDark\"])\n", - "rg = constants.get(\"RelativeGain\")\n", - "\n", - "corr_comp_fields = {}\n", - "kw = {\"compression\": \"gzip\"}\n", - "comp_fields = [\"gain\", \"patterns\", \"pixels_classified\"]" + "relativegain = constants.get(\"RelativeGain\")" ] }, { @@ -677,14 +663,25 @@ "metadata": {}, "outputs": [], "source": [ - "def write_dataset(ofile, data, field, kw={}):\n", - " ofile.create_dataset(\n", - " f\"{data_path}/{field}\",\n", - " data=data,\n", - " chunks=(chunk_size_idim, pixels_x, pixels_y),\n", - " dtype=data.dtype,\n", - " **kw,\n", - " )" + "def write_datasets(corr_arrays, ofile):\n", + " \"\"\"\n", + " Creating datasets first then adding data.\n", + " To have metadata together available at the start of the file,\n", + " so it's quick to see what the file contains\n", + " \"\"\"\n", + " comp_fields = [\"gain\", \"patterns\", \"pixels_classified\"]\n", + " img_grp = ofile[data_path]\n", + "\n", + " for field, data in corr_arrays.items():\n", + " kw = dict(chunks=(chunk_size_idim, pixels_x, pixels_y))\n", + " if field in comp_fields:\n", + " kw[\"compression\"] = \"gzip\"\n", + "\n", + " img_grp.create_dataset(\n", + " field, shape=data.shape, dtype=data.dtype, **kw)\n", + "\n", + " for field, data in corr_arrays.items():\n", + " img_grp[field][:] = data" ] }, { @@ -712,60 +709,79 @@ "\n", " print(f\"Correcting file: {seq_f} of shape {data_shape}.\")\n", "\n", - " data_dc = f_dc.select(\n", - " instrument_src, \"data.image\",\n", - " require_all=True).select_trains(np.s_[:n_trains])\n", + " data_dc = f_dc.select(instrument_src, \"data.image\", require_all=True).select_trains(np.s_[:n_trains]) # noqa\n", + "\n", " raw_data = data_dc[instrument_src, \"data.image\"].ndarray().astype(np.float32)\n", + "\n", " if seq_n == 0:\n", " raw_plt = raw_data.copy() # plot first sequence only\n", - " # Allocating shared arrays for data arrays for each correction stage.\n", - " data = context.alloc(shape=data_shape, dtype=np.float32)\n", "\n", " step_timer.start()\n", - " bpix_data = context.alloc(shape=data_shape)\n", "\n", + " # Allocating shared arrays for data arrays for each correction stage.\n", + " data = context.alloc(shape=data_shape, dtype=np.float32)\n", + " bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)\n", + " histCalRaw.fill(raw_data) # filling histogram with raw uncorrected data\n", + " \n", + " # Applying offset correction\n", " context.map(offset_correction, raw_data)\n", + " histCalOffsetCor.fill(data) # filling histogram with offset corrected data\n", + "\n", " if seq_n == 0:\n", " off_data = data.copy() # plot first sequence only\n", "\n", + "\n", " corr_arrays = {\n", - " \"pixels\": off_data.astype(np.float32),\n", - " \"mask\": bpix_data.astype(np.uint32), \n", + " \"pixels\": data.copy(),\n", + " \"mask\": bpix_data, \n", " }\n", " step_timer.done_step(f'offset correction.')\n", "\n", " if corr_bools.get('common_mode'):\n", " step_timer.start()\n", + "\n", + " # Applying common mode correction\n", " context.map(common_mode, data)\n", " if seq_n == 0:\n", " cm_data = data.copy() # plot first sequence only\n", - " corr_arrays[\"pixels_cm\"] = cm_data.astype(np.float32)\n", + " corr_arrays[\"pixels_cm\"] = data.copy()\n", + " histCalCommonModeCor.fill(data) # filling histogram with common mode corrected data\n", + "\n", " step_timer.done_step(f'common-mode correction.')\n", "\n", " if corr_bools.get('relgain'):\n", + "\n", " step_timer.start()\n", + "\n", + " # Applying gain correction\n", " context.map(gain_correction, data)\n", " if seq_n == 0:\n", " rg_data = data.copy() # plot first sequence only\n", - " corr_arrays[\"gain\"] = np.repeat(\n", - " rg[np.newaxis, ...],\n", - " n_trains, axis=0).astype(np.float32) # TODO: confirm that we need to keep saving a constant\n", + " # TODO: Why storing a repeated constant for each image in corrected files.\n", + " corr_arrays[\"gain\"] = np.repeat(relativegain[np.newaxis, ...], n_trains, axis=0).astype(np.float32) # noqa\n", + " histCalGainCor.fill(data) # filling histogram with gain corrected data\n", " step_timer.done_step(f'gain correction.')\n", "\n", " if corr_bools.get('pattern_class'):\n", " step_timer.start()\n", "\n", " ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)\n", + " filtered_data = context.alloc(shape=data_shape, dtype=np.int32)\n", + " # Applying pattern classification correction\n", " # Even thougth data is indeed of dtype np.float32,\n", " # not specifiying this again screw with the data quality.\n", - " context.map(pattern_classification, data.astype(np.float32))\n", + " context.map(pattern_classification_correction, data.astype(np.float32))\n", "\n", " if seq_n == 0:\n", " cls_data = data.copy() # plot first sequence only\n", " # split event corrected images plotted for first sequence only\n", " # (also these events are only singles events):\n", - " corr_arrays[\"pixels_classified\"] = cls_data.astype(np.float32)\n", + " corr_arrays[\"pixels_classified\"] = data.copy()\n", " corr_arrays[\"patterns\"] = ptrn_data\n", + "\n", + " histCalPcorr.fill(data) # filling histogram with split events corrected data\n", + " # filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles\n", + " histCalPcorrS.fill(filtered_data)\n", " step_timer.done_step(f'pattern classification correction.')\n", "\n", " step_timer.start()\n", @@ -778,11 +794,9 @@ " sfile, ofile,\n", " [\"INSTRUMENT/\"+instrument_src+\"/data/image\"],\n", " )\n", - " for field, arr in corr_arrays.items():\n", - " write_dataset(\n", - " ofile, arr, field,\n", - " kw=kw if field in comp_fields else {})\n", " # TODO: to clear this up: why save corrected data in data/pixels rather than data/image.\n", + " write_datasets(corr_arrays, ofile)\n", + "\n", " step_timer.done_step(f'Storing data.')" ] }, @@ -801,6 +815,21 @@ "step_timer.print_summary()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"In addition to offset correction, the following corrections were performed:\")\n", + "for k, v in corr_bools.items():\n", + " if v:\n", + " print(\" -\", k.upper())\n", + "\n", + "print(f\"Total processing time {step_timer.timespan():.01f} s\")\n", + "step_timer.print_summary()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1221,9 +1250,7 @@ " doubles = []\n", " triples = []\n", " quads = []\n", - " with H5File(\n", - " f\"{out_folder}/{path_template.format(run, karabo_da, sequences[0]).replace('RAW', 'CORR')}\"\n", - " ) as dc:\n", + " with H5File(f\"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}\") as dc: # noqa\n", " data = dc[instrument_src, \"data.pixels_classified\"].ndarray()\n", " patterns = dc[instrument_src, \"data.patterns\"].ndarray()\n", " # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),\n", -- GitLab