Skip to content
Snippets Groups Projects

feat[Epix100][Correct]: New corrected data source and a link to old data source

Merged Karim Ahmed requested to merge feat/epix_new_correct_souce into master
@@ -37,7 +37,8 @@
"db_module = \"\" # module id in the database\n",
"receiver_template = \"RECEIVER\" # detector receiver template for accessing raw data files\n",
"path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data\n",
"instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files\n",
"input_source_template = '{karabo_id}/DET/{receiver}:daqOutput' # input(raw) detector data source in h5files\n",
"output_source_template = '{karabo_id}/CORR/{receiver}:daqOutput' # output(corrected) detector data source in h5files\n",
"\n",
"# Parameters affecting writing corrected data.\n",
"chunk_size_idim = 1 # H5 chunking size of output data\n",
@@ -86,7 +87,6 @@
"from logging import warning\n",
"from sys import exit\n",
"\n",
"import h5py\n",
"import pasha as psh\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
@@ -145,11 +145,14 @@
"\n",
"run_folder = in_folder / f\"r{run:04d}\"\n",
"\n",
"instrument_src = instrument_source_template.format(\n",
" karabo_id, receiver_template)\n",
"output_source_template = output_source_template or input_source_template\n",
"\n",
"input_src = input_source_template.format(\n",
" karabo_id=karabo_id, receiver=receiver_template)\n",
"output_src = output_source_template.format(\n",
" karabo_id=karabo_id, receiver=receiver_template)\n",
"\n",
"print(f\"Correcting run: {run_folder}\")\n",
"print(f\"Instrument H5File source: {instrument_src}\")\n",
"print(f\"Data corrected files are stored at: {out_folder}\")"
]
},
@@ -211,7 +214,7 @@
"# Read control data.\n",
"ctrl_data = epix100lib.epix100Ctrl(\n",
" run_dc=run_dc,\n",
" instrument_src=instrument_src,\n",
" instrument_src=input_src,\n",
" ctrl_src=f\"{karabo_id}/DET/CONTROL\",\n",
" )\n",
"\n",
@@ -557,7 +560,7 @@
" out_file = out_folder / f.name.replace(\"RAW\", \"CORR\")\n",
"\n",
" # Data shape in seq_dc excluding trains with empty images. \n",
" ishape = seq_dc[instrument_src, \"data.image.pixels\"].shape\n",
" ishape = seq_dc[input_src, \"data.image.pixels\"].shape\n",
" corr_ntrains = ishape[0]\n",
" all_train_ids = seq_dc.train_ids\n",
"\n",
@@ -587,9 +590,9 @@
"\n",
" # Overwrite seq_dc after eliminating empty trains or/and applying limited images.\n",
" seq_dc = seq_dc.select(\n",
" instrument_src, \"*\", require_all=True).select_trains(np.s_[:corr_ntrains])\n",
" input_src, \"*\", require_all=True).select_trains(np.s_[:corr_ntrains])\n",
"\n",
" pixel_data = seq_dc[instrument_src, \"data.image.pixels\"]\n",
" pixel_data = seq_dc[input_src, \"data.image.pixels\"]\n",
" context.map(correct_train, pixel_data)\n",
"\n",
" step_timer.done_step(f'Correcting {corr_ntrains} trains.')\n",
@@ -597,7 +600,7 @@
" step_timer.start() # Write corrected data.\n",
"\n",
" # Create CORR files and add corrected data sections.\n",
" image_counts = seq_dc[instrument_src, \"data.image.pixels\"].data_counts(labelled=False)\n",
" image_counts = seq_dc[input_src, \"data.image.pixels\"].data_counts(labelled=False)\n",
"\n",
" # Write corrected data.\n",
" with DataFile(out_file, \"w\") as ofile:\n",
@@ -605,49 +608,52 @@
" seq_file = seq_dc.files[0] # FileAccess\n",
" # Create INDEX datasets.\n",
" ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])\n",
" # Create METDATA datasets\n",
" # Create METADATA datasets\n",
" ofile.create_metadata(\n",
" like=seq_dc,\n",
" sequence=seq_file.sequence,\n",
" instrument_channels=(f'{instrument_src}/data',)\n",
" instrument_channels=sorted({f'{output_src}/data',f'{input_src}/data'})\n",
" )\n",
" # Create Instrument section to later add corrected datasets.\n",
" outp_source = ofile.create_instrument_source(instrument_src)\n",
" instr_src_group = ofile.create_instrument_source(output_src)\n",
"\n",
" # Create count/first datasets at INDEX source.\n",
" outp_source.create_index(data=image_counts)\n",
" instr_src_group.create_index(data=image_counts)\n",
"\n",
" image_raw_fields = [ # /data/image/\n",
" \"binning\", \"bitsPerPixel\", \"dimTypes\", \"dims\",\n",
" \"encoding\", \"flipX\", \"flipY\", \"roiOffsets\", \"rotation\",\n",
" ]\n",
" for field in image_raw_fields:\n",
" field_arr = seq_dc[instrument_src, f\"data.image.{field}\"].ndarray()\n",
" field_arr = seq_dc[input_src, f\"data.image.{field}\"].ndarray()\n",
"\n",
" outp_source.create_key(\n",
" instr_src_group.create_key(\n",
" f\"data.image.{field}\", data=field_arr,\n",
" chunks=(chunk_size_idim, *field_arr.shape[1:]))\n",
"\n",
" # Add main corrected `data.image.pixels` dataset and store corrected data.\n",
" outp_source.create_key(\n",
" instr_src_group.create_key(\n",
" \"data.image.pixels\", data=data, chunks=dataset_chunk)\n",
" outp_source.create_key(\n",
" instr_src_group.create_key(\n",
" \"data.trainId\", data=seq_dc.train_ids, chunks=min(50, len(seq_dc.train_ids)))\n",
" \n",
" if np.isin('data.pulseId', list(seq_dc[instrument_src].keys())): # some runs are missing 'data.pulseId'\n",
" outp_source.create_key(\n",
" if 'data.pulseId' in seq_dc[input_src].keys(): # some runs are missing 'data.pulseId'\n",
" instr_src_group.create_key(\n",
" \"data.pulseId\",\n",
" data=list(seq_dc[instrument_src]['data.pulseId'].ndarray()[:, 0]),\n",
" data=seq_dc[input_src]['data.pulseId'].ndarray()[:, 0],\n",
" chunks=min(50, len(seq_dc.train_ids)),\n",
" )\n",
" \n",
" if pattern_classification:\n",
" # Add main corrected `data.image.pixels` dataset and store corrected data.\n",
" outp_source.create_key(\n",
" instr_src_group.create_key(\n",
" \"data.image.pixels_classified\", data=data_clu, chunks=dataset_chunk)\n",
" outp_source.create_key(\n",
" instr_src_group.create_key(\n",
" \"data.image.patterns\", data=data_patterns, chunks=dataset_chunk)\n",
"\n",
" if output_src != input_src:\n",
" ofile.create_legacy_source(input_src, output_src)\n",
"\n",
" step_timer.done_step('Storing data.')\n",
"if empty_seq == len(seq_files):\n",
" warning(\"No valid trains for RAW data to correct.\")\n",
Loading