Skip to content
Snippets Groups Projects

[Timepix] Store labels assigned to pixels

Merged Philipp Schmidt requested to merge feat/timepix-labels into master
@@ -20,9 +20,9 @@
"outputs": [],
"source": [
"# Data selection parameters.\n",
"run = 420 # required\n",
"in_folder = '/gpfs/exfel/exp/SQS/202230/p900256/raw' # required\n",
"out_folder = '/gpfs/exfel/exp/SQS/202230/p900256/scratch/cal_test' # required\n",
"run = 307 # required\n",
"in_folder = '/gpfs/exfel/exp/SQS/202430/p900421/raw' # required\n",
"out_folder = '/gpfs/exfel/exp/SQS/202430/p900421/scratch/cal_test' # required\n",
"proposal = '' # Proposal, leave empty for auto detection based on in_folder\n",
"\n",
"# These parameters are required by xfel-calibrate but ignored in this notebook.\n",
@@ -141,44 +141,6 @@
" return tpx_data\n",
"\n",
"\n",
"def pre_clustering_filter(tpx_data, tot_threshold=0):\n",
" \"\"\"\n",
" Collection of filters directly applied before clustering.\n",
" Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n",
"\n",
" Parameters\n",
" ----------\n",
" tpx_data: Dictionary with timepix data, all arrays behind each key must be of same length\n",
" tot_threshold: minimum ToT required for a pixel to contain valid data\n",
"\n",
" Returns\n",
" -------\n",
" tpx_data: like input tpx_data but with applied filters\n",
" \"\"\"\n",
" if tot_threshold > 0:\n",
" tpx_data = apply_single_filter(tpx_data, tpx_data[\"tot\"] >= tot_threshold)\n",
"\n",
" return tpx_data\n",
"\n",
"\n",
"def post_clustering_filter(tpx_data):\n",
" \"\"\"\n",
" Collection of filters directly applied after clustering.\n",
" Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n",
"\n",
" Parameters\n",
" ----------\n",
" tpx_data: Dictionary with timepix data, all arrays behind each key must be of same length, now with key labels\n",
"\n",
" Returns\n",
" -------\n",
" tpx_data: like input tpx_data but with applied filters\n",
" \"\"\"\n",
" if tpx_data[\"labels\"] is not None:\n",
" tpx_data = apply_single_filter(tpx_data, tpx_data[\"labels\"] != 0)\n",
"\n",
" return tpx_data\n",
"\n",
"\n",
"def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):\n",
" \"\"\"\n",
@@ -203,7 +165,7 @@
" \"\"\"\n",
" coords = np.column_stack((tpx_data[\"x\"], tpx_data[\"y\"], tpx_data[\"toa\"]*tof_scale))\n",
" dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric=\"euclidean\", n_jobs=n_jobs).fit(coords)\n",
" return dist.labels_ + 1\n",
" return dist.labels_\n",
"\n",
"def empty_centroid_data():\n",
" return {\n",
@@ -264,19 +226,24 @@
" print(\"Data validation failed with message: %s\" % error_msgs[data_validation])\n",
" else:\n",
" print(\"Data validation failed: unknown reason\")\n",
" return empty_centroid_data()\n",
" return np.array([]), empty_centroid_data()\n",
"\n",
" # clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,\n",
" # each sample belonging to a cluster is labeled with an integer cluster id no)\n",
" _tpx_data = pre_clustering_filter(_tpx_data, tot_threshold=threshold_tot)\n",
" _tpx_data[\"labels\"] = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples)\n",
" _tpx_data = post_clustering_filter(_tpx_data)\n",
" if threshold_tot > 0:\n",
" _tpx_data = apply_single_filter(_tpx_data, _tpx_data[\"tot\"] >= threshold_tot) \n",
"\n",
" labels = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples)\n",
" _tpx_data[\"labels\"] = labels\n",
" \n",
" if labels is not None:\n",
" _tpx_data = apply_single_filter(_tpx_data, labels >= 0)\n",
" \n",
" # compute centroid data (reduce cluster of samples to a single point with properties)\n",
" if _tpx_data[\"labels\"] is None or _tpx_data[\"labels\"].size == 0:\n",
" if labels is None or len(_tpx_data['x']) == 0:\n",
" # handle case of no identified clusters, return empty dictionary with expected keys\n",
" return empty_centroid_data()\n",
" _centroids = get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)\n",
" return _centroids\n",
" return np.array([]), empty_centroid_data()\n",
" return labels, get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)\n",
"\n",
"\n",
"def process_train(worker_id, index, train_id, data):\n",
@@ -292,7 +259,7 @@
" if raw_timewalk_lut is not None:\n",
" toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3\n",
"\n",
" centroids = compute_centroids(x, y, toa, tot, **centroiding_kwargs)\n",
" labels, centroids = compute_centroids(x, y, toa, tot, **centroiding_kwargs)\n",
"\n",
" num_centroids = len(centroids['x'])\n",
" fraction_centroids = np.sum(centroids[\"size\"])/events['data.size'] if events['data.size']>0 else np.nan\n",
@@ -303,6 +270,7 @@
"\n",
" for key in centroid_dt.names:\n",
" out_data[index, :num_centroids][key] = centroids[key]\n",
" out_labels[index, :len(labels)] = labels\n",
" out_stats[index][\"fraction_px_in_centroids\"] = fraction_centroids\n",
" out_stats[index][\"N_centroids\"] = num_centroids\n",
" out_stats[index][\"missing_centroids\"] = missing_centroids"
@@ -357,6 +325,7 @@
" ('tot_max', np.uint16),\n",
" ('size', np.int16)])\n",
"\n",
"pixel_shape = in_dc[in_fast_data]['data.x'].entry_shape\n",
"\n",
"centroid_settings_template = {\n",
" 'timewalk_correction.raw_applied': (np.bool, bool(raw_timewalk_lut_filepath)),\n",
@@ -401,10 +370,12 @@
" control_sources=[out_device_id],\n",
" instrument_channels=[f'{out_fast_data}/data'])\n",
" seq_file.create_index(train_ids)\n",
" \n",
" \n",
" out_labels = psh.alloc(shape=(len(train_ids),) + pixel_shape, dtype=np.int32)\n",
" out_data = psh.alloc(shape=(len(train_ids), max_num_centroids), dtype=centroid_dt)\n",
" out_stats = psh.alloc(shape=(len(train_ids),), dtype=centroid_stats_dt)\n",
" \n",
" out_labels[:] = -1\n",
" out_data[:] = (np.nan, np.nan, np.nan, np.nan, np.nan, 0, -1)\n",
" out_stats[:] = tuple([centroid_stats_template[key][1] for key in centroid_stats_template])\n",
" \n",
@@ -421,6 +392,8 @@
" for key, (type_, data) in centroid_settings_template.items():\n",
" cur_slow_data.create_run_key(f'settings.{key}', data)\n",
" \n",
" cur_fast_data.create_key('data.labels', data=out_labels,\n",
" chunks=(1,) + pixel_shape, **dataset_kwargs)\n",
" cur_fast_data.create_key('data.centroids', out_data,\n",
" chunks=tuple(chunks_centroids),\n",
" **dataset_kwargs)\n",
Loading