diff --git a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb index ece51d23ea9ae3d9d61985df6040881beb56ebc1..beec4c5bc6f78539169eadba38d6059c17374899 100644 --- a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb +++ b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb @@ -28,7 +28,7 @@ "karabo_id = \"SQS_NQS_PNCCD1MP\" # karabo prefix of PNCCD devices\n", "receiver_id = \"PNCCD_FMT-0\" # inset for receiver devices\n", "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "instrument_source_template = '{}/CAL/{}:output' # template for data source name, will be filled with karabo_id and receiver_id.\n", + "input_source_template = '{karabo_id}/CAL/{receiver}:output' # template for input (raw) data source name, will be filled with karabo_id and receiver_id.\n", "\n", "# Parameters affecting data correction.\n", "commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column\n", @@ -36,6 +36,7 @@ "split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise\n", "split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise\n", "saturated_threshold = 32000. # full well capacity in ADU\n", + "output_source_template = '{karabo_id}/CORR/{receiver}:output' # template for output (corrected) data source name, will be filled with karabo_id and receiver_id.\n", "\n", "# Conditions for retrieving calibration constants\n", "fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.\n", @@ -141,9 +142,14 @@ "print(f\"pnCCD size is: {pixels_x}x{pixels_y} pixels.\")\n", "print(f'Calibration database interface selected: {cal_db_interface}')\n", "\n", + "output_source_template = output_source_template or input_source_template\n", + "\n", "# Paths to the data:\n", - "instrument_src = instrument_source_template.format(karabo_id, receiver_id)\n", - "print(f\"Instrument H5File source: {instrument_src}\\n\")\n", + "input_src = input_source_template.format(\n", + " karabo_id=karabo_id, receiver=receiver_id)\n", + "output_src = output_source_template.format(\n", + " karabo_id=karabo_id, receiver=receiver_id)\n", + "print(f\"Instrument raw source: {input_src}\\n\")\n", "\n", "# Run's creation time:\n", "creation_time = calcat_creation_time(in_folder, run, creation_time)\n", @@ -200,7 +206,7 @@ "outputs": [], "source": [ "seq_files = []\n", - "for f in run_dc.select(instrument_src).files:\n", + "for f in run_dc.select(input_src).files:\n", " fpath = Path(f.filename)\n", " if fpath.match(f\"*{karabo_da}*.h5\"):\n", " seq_files.append(fpath)\n", @@ -589,8 +595,6 @@ "parallel_num_threads = 10\n", "context = psh.context.ThreadContext(num_workers=parallel_num_threads)\n", "\n", - "data_path = \"INSTRUMENT/\"+instrument_src+\"/data/\"\n", - "\n", "offset = np.squeeze(constants[\"OffsetCCD\"])\n", "noise = np.squeeze(constants[\"NoiseCCD\"])\n", "bpix = np.squeeze(constants[\"BadPixelsDarkCCD\"])\n", @@ -603,28 +607,28 @@ "metadata": {}, "outputs": [], "source": [ - "def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):\n", + "def write_datasets(seq_dc, corr_arrays, out_file, input_src, output_src):\n", " \"\"\"\n", " Creating datasets first then adding data.\n", " To have metadata together available at the start of the file,\n", " so it's quick to see what the file contains.\n", " \"\"\"\n", " # Create CORR files and add corrected data sections.\n", - " image_counts = seq_dc[instrument_src, \"data.image\"].data_counts(labelled=False)\n", - " dataset_chunk = ((chunk_size_idim,) + corr_arrays[\"pixels\"].shape[1:]) # e.g. (1, pixels_x, pixels_y) \n", + " image_counts = seq_dc[input_src, \"data.image\"].data_counts(labelled=False)\n", + " dataset_chunk = ((chunk_size_idim,) + corr_arrays[\"image\"].shape[1:]) # e.g. (1, pixels_x, pixels_y) \n", " with DataFile(out_file, 'w') as ofile:\n", " seq_file = seq_dc.files[0]\n", " # Create INDEX datasets.\n", " ofile.create_index(seq_dc.train_ids, from_file=seq_file)\n", - " # Create METDATA datasets\n", + " # Create METADATA datasets\n", " ofile.create_metadata(\n", " like=seq_dc,\n", " sequence=seq_file.sequence,\n", - " instrument_channels=(f\"{instrument_src}/data\",)\n", + " instrument_channels=(f\"{output_src}/data\",)\n", " )\n", "\n", " # Create Instrument section to later add corrected datasets.\n", - " outp_source = ofile.create_instrument_source(instrument_src)\n", + " outp_source = ofile.create_instrument_source(output_src)\n", "\n", " # Create count/first datasets at INDEX source.\n", " outp_source.create_index(data=image_counts)\n", @@ -634,13 +638,8 @@ " f\"data.trainId\", data=seq_dc.train_ids,\n", " chunks=min(50, len(seq_dc.train_ids))\n", " )\n", - " \n", - " # TODO: gain dataset is just the RelativeGain constant\n", - " # and it doesn't make sense to write it into corrected data.\n", - " comp_fields = [\"gain\", \"patterns\", \"pixels_classified\"]\n", "\n", - " # TODO: to clear this up: why save corrected data\n", - " # in data/pixels rather than data/image.\n", + " comp_fields = [\"patterns\", \"pixels_classified\"]\n", " for field, data in corr_arrays.items():\n", " if field in comp_fields: # Write compressed corrected data.\n", " outp_source.create_compressed_key(f\"data.{field}\", data=data)\n", @@ -666,9 +665,9 @@ "\n", " step_timer.start()\n", "\n", - " img_dc = seq_dc[instrument_src, \"data.image\"]\n", + " img_dc = seq_dc[input_src, \"data.image\"]\n", "\n", - " dshape = seq_dc[instrument_src, \"data.image\"].shape\n", + " dshape = img_dc.shape\n", "\n", " n_trains = dshape[0]\n", " corr_ntrains = dshape[0] # number of available trains to correct.\n", @@ -699,10 +698,9 @@ "\n", " # Overwrite seq_dc after eliminating empty trains or/and applying limited images.\n", " seq_dc = seq_dc.select(\n", - " instrument_src, \"*\", require_all=True).select_trains(np.s_[:corr_ntrains])\n", + " input_src, \"*\", require_all=True).select_trains(np.s_[:corr_ntrains])\n", "\n", - " raw_data = seq_dc[instrument_src, \"data.image\"].ndarray().astype(np.float32)\n", - " to_store_arrays = {\"image\": raw_data}\n", + " raw_data = seq_dc[input_src, \"data.image\"].ndarray().astype(np.float32)\n", "\n", " # TODO: move the parts for reading data to plot to later cells.\n", " if seq_n == 0:\n", @@ -722,7 +720,7 @@ " if seq_n == 0:\n", " off_data = data.copy() # plot first sequence only\n", "\n", - " to_store_arrays[\"pixels\"] = data.copy()\n", + " to_store_arrays = {\"image\": data.copy()}\n", " to_store_arrays[\"mask\"] = bpix_data\n", "\n", " step_timer.done_step(f'offset correction.')\n", @@ -747,8 +745,7 @@ " context.map(gain_correction, data)\n", " if seq_n == 0:\n", " rg_data = data.copy() # plot first sequence only\n", - " # TODO: Why storing a repeated constant for each image in corrected files.\n", - " to_store_arrays[\"gain\"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32) # noqa\n", + "\n", " histCalGainCor.fill(data) # filling histogram with gain corrected data\n", " step_timer.done_step(f'gain correction.')\n", "\n", @@ -781,7 +778,8 @@ " seq_dc=seq_dc,\n", " corr_arrays=to_store_arrays,\n", " out_file=out_file,\n", - " instrument_src=instrument_src,\n", + " input_src=input_src,\n", + " output_src=output_src\n", " )\n", " step_timer.done_step(f'Storing data.')\n", "# Exit and raise warning if there are no data to correct for all sequences.\n", @@ -1241,8 +1239,8 @@ " triples = []\n", " quads = []\n", " with H5File(f\"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}\") as dc: # noqa\n", - " data = dc[instrument_src, \"data.pixels_classified\"].ndarray()\n", - " patterns = dc[instrument_src, \"data.patterns\"].ndarray()\n", + " data = dc[output_src, \"data.pixels_classified\"].ndarray()\n", + " patterns = dc[output_src, \"data.patterns\"].ndarray()\n", " # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),\n", " # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns, \n", " # there are left, right, up, and down indices.\n",