diff --git a/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb new file mode 100755 index 0000000000000000000000000000000000000000..cce6da8569c7290c4989bd2963ab6d4725a0ef0b --- /dev/null +++ b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4d04f210-fc4a-4b1d-9dfe-9c85b94b884f", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "524fe654-e112-4abe-813c-a0be9b3a3034", + "metadata": {}, + "outputs": [], + "source": [ + "from time import time\n", + "\n", + "import numpy as np\n", + "\n", + "from extra_data import open_run, RunDirectory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "392aa413-0035-4861-9f1f-03abb51d0613", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib notebook\n", + "p900256" + ] + }, + { + "cell_type": "markdown", + "id": "3911b4e1-11af-4a51-be59-103ca9f6f261", + "metadata": {}, + "source": [ + "## Centroiding functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e36f997c-4b66-4b11-99a8-5887e3572f56", + "metadata": {}, + "outputs": [], + "source": [ + "# centroiding\n", + "\n", + "import numpy as np\n", + "import scipy.ndimage as nd\n", + "from sklearn.cluster import DBSCAN\n", + "from warnings import warn\n", + "import traceback as tb\n", + "\n", + "error_msgs = {\n", + " -1: \"tpx_data has an invalid structure - ignore provided data\",\n", + " -2: \"tpx_data arrays are of invalid lengths - ignore provided data\",\n", + " -3: \"tpx_data arrays are empty\"\n", + " }\n", + "\n", + "def check_data(tpx_data):\n", + " required_keys = [\"x\", \"y\", \"tof\", \"tot\"]\n", + " for key in required_keys:\n", + " if key not in tpx_data.keys():\n", + " warn(\"tpx data must contain the keys %s, but key %s not in tpx data keys (%s)\" % (required_keys, key, list(tpx_data.keys())),\n", + " category=UserWarning)\n", + " return -1\n", + "\n", + " reference_n_samples_key = \"x\"\n", + " n_samples = len(tpx_data[reference_n_samples_key])\n", + " for key in tpx_data.keys():\n", + " if n_samples != len(tpx_data[key]):\n", + " warn(\"arrays in tpx data must be of same length ( len(tpx_data[%s])=%i!=%i=(len(tpx_data[%s]) )\" % (reference_n_samples_key, n_samples, len(tpx_data[key]), key),\n", + " category=UserWarning)\n", + " return -2\n", + " if n_samples == 0:\n", + " warn(\"no samples were provides with tpx data\", category=UserWarning)\n", + " return -3\n", + " return 0\n", + "\n", + "def apply_single_filter(tpx_data, _filter):\n", + " \"\"\"\n", + " Simple function to apply a selecting or sorting filter to a dictionary of equally sized arrays\n", + " Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n", + "\n", + " Parameters\n", + " ----------\n", + " tpx_data: dictionary with timepix data, all arrays behind each key must be of same length\n", + " _filter: 1d array or list of integers or booleans or np.s_ to select or sort data like a = a[_filter]\n", + "\n", + " Returns\n", + " -------\n", + " tpx_data: like input tpx_data but with applied filter\n", + "\n", + " \"\"\"\n", + " try:\n", + " for key in tpx_data.keys():\n", + " tpx_data[key] = np.array(tpx_data[key])[_filter]\n", + " except Exception as e:\n", + " # print(tpx_data)\n", + " print(_filter)\n", + " print(_filter.dtype)\n", + " print(_filter.shape)\n", + " print(tpx_data[key].shape)\n", + " raise e\n", + " return tpx_data\n", + "\n", + "def pre_clustering_filter(tpx_data, tot_threshold=None):\n", + " \"\"\"\n", + " Collection of filters directly applied before clustering.\n", + " Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n", + "\n", + " Parameters\n", + " ----------\n", + " tpx_data: dictionary with timepix data, all arrays behind each key must be of same length\n", + " tot_threshold:\n", + "\n", + " Returns\n", + " -------\n", + " tpx_data: like input tpx_data but with applied filters\n", + " \"\"\"\n", + " if tot_threshold is not None:\n", + " tpx_data = apply_single_filter(tpx_data, tpx_data[\"tot\"] >= tot_threshold)\n", + "\n", + " return tpx_data\n", + "\n", + "def post_clustering_filter(tpx_data):\n", + " \"\"\"\n", + " Collection of filters directly applied after clustering.\n", + " Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n", + "\n", + " Parameters\n", + " ----------\n", + " tpx_data: dictionary with timepix data, all arrays behind each key must be of same length, now with key labels\n", + "\n", + " Returns\n", + " -------\n", + " tpx_data: like input tpx_data but with applied filters\n", + " \"\"\"\n", + " if tpx_data[\"labels\"] is not None:\n", + " tpx_data = apply_single_filter(tpx_data, tpx_data[\"labels\"] != 0)\n", + "\n", + " return tpx_data\n", + "\n", + "def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):\n", + " \"\"\"\n", + "\n", + " Parameters\n", + " ----------\n", + " tpx_data\n", + " epsilon\n", + " tof_scale\n", + " min_samples\n", + " n_jobs\n", + "\n", + " Returns\n", + " -------\n", + "\n", + " \"\"\"\n", + " coords = np.column_stack((tpx_data[\"x\"], tpx_data[\"y\"], tpx_data[\"tof\"]*tof_scale))\n", + " dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric=\"euclidean\", n_jobs=n_jobs).fit(coords)\n", + " return dist.labels_ + 1\n", + "\n", + "def empty_centroid_data():\n", + " return {\n", + " \"x\": np.array([]),\n", + " \"y\": np.array([]),\n", + " \"tof\": np.array([]),\n", + " \"tot_avg\": np.array([]),\n", + " \"tot_max\": np.array([]),\n", + " \"size\": np.array([]),\n", + " }\n", + "\n", + "def get_centroids(tpx_data, timewalk_lut=None):\n", + " centroid_data = empty_centroid_data()\n", + " cluster_labels, cluster_size = np.unique(tpx_data[\"labels\"], return_counts=True)\n", + "\n", + " cluster_tot_peaks = np.array(nd.maximum_position(tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels)).ravel()\n", + " cluster_tot_integrals = nd.sum(tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels)\n", + "\n", + " # compute centroid center through weighted average\n", + " centroid_data[\"x\"] = np.array(nd.sum(tpx_data[\"x\"] * tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels) / cluster_tot_integrals).ravel()\n", + " centroid_data[\"y\"] = np.array(nd.sum(tpx_data[\"y\"] * tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels) / cluster_tot_integrals).ravel()\n", + " centroid_data[\"tof\"] = np.array(nd.sum(tpx_data[\"tof\"] * tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels) / cluster_tot_integrals).ravel()\n", + "\n", + " # intensity & size information\n", + " centroid_data[\"tot_avg\"] = np.array(nd.mean(tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels))\n", + " centroid_data[\"tot_max\"] = tpx_data[\"tot\"][cluster_tot_peaks]\n", + " centroid_data[\"tot\"] = np.array(cluster_tot_integrals)\n", + " centroid_data[\"size\"] = cluster_size\n", + "\n", + " # train ID information\n", + " # ~ centroid_data[\"tid\"] = tpx_data[\"tid\"][cluster_tot_peaks]\n", + "\n", + " # correct for timewalk if provided\n", + " if timewalk_lut is not None:\n", + " centroid_data[\"tof\"] -= timewalk_lut[np.int_(centroid_data[\"tot_max\"] // 25) - 1] * 1e3\n", + " return centroid_data\n", + "\n", + "def compute_centroids(x, y, tof, tot,\n", + " threshold_tot=None,\n", + " clustering_epsilon=2,\n", + " clustering_tof_scale=1e7,\n", + " clustering_min_samples=3,\n", + " clustering_n_jobs=1,\n", + " centroiding_timewalk_lut=None):\n", + " # format input data\n", + " _tpx_data = {\n", + " \"x\": x,\n", + " \"y\": y,\n", + " \"tof\": tof,\n", + " \"tot\": tot\n", + " }\n", + "\n", + " # ensure that valid data is available\n", + " data_validation = check_data(_tpx_data)\n", + " if data_validation < 0:\n", + " if data_validation in error_msgs.keys():\n", + " print(\"Data validation failed with message: %s\" % error_msgs[data_validation])\n", + " else:\n", + " print(\"Data validation failed: unknown reason\")\n", + " return None\n", + "\n", + " # clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,\n", + " # each sample belonging to a cluster is labeled with an integer cluster id no)\n", + " _tpx_data = pre_clustering_filter(_tpx_data, tot_threshold=threshold_tot)\n", + " _tpx_data[\"labels\"] = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples, n_jobs=clustering_n_jobs)\n", + " _tpx_data = post_clustering_filter(_tpx_data)\n", + " # compute centroid data (reduce cluster of samples to a single point with properties)\n", + " if _tpx_data[\"labels\"] is None or _tpx_data[\"labels\"].size == 0:\n", + " # handle case of no identified clusters, return empty dictionary with expected keys\n", + " return empty_centroid_data()\n", + " _centroids = get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)\n", + " return _centroids" + ] + }, + { + "cell_type": "markdown", + "id": "4f54e558-d862-48af-86b9-7d9e3d53fd3d", + "metadata": {}, + "source": [ + "## Output Function (for illustration) - instead data should be written to output file train-id wise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83acf826-e351-4a39-a870-6c1e0298e711", + "metadata": {}, + "outputs": [], + "source": [ + "def output(train_id, centroids, fixed_len=None):\n", + " \"\"\"\n", + " In this version the resulting vectors within a run vary in length,\n", + " if fixed len is provided fixed length vectors are generated\n", + " \"\"\"\n", + " if fixed_len is None:\n", + " out = {\n", + " \"%s[data.trainId]\"%event_output_key : train_id,\n", + " \"%s[data.x]\"%event_output_key : centroids[\"x\"],\n", + " \"%s[data.y]\"%event_output_key : centroids[\"y\"],\n", + " \"%s[data.toa]\"%event_output_key : centroids[\"tof\"],\n", + " \"%s[data.tot_avg]\"%event_output_key : centroids[\"tot_avg\"],\n", + " \"%s[data.tot_max]\"%event_output_key : centroids[\"tot_max\"],\n", + " \"%s[data.size]\"%event_output_key : centroids[\"size\"],\n", + " }\n", + " else:\n", + " if len(centroids[\"x\"])>fixed_len:\n", + " sel = np.s_[:fixed_len]\n", + " out = {\n", + " \"%s[data.trainId]\"%event_output_key : train_id,\n", + " \"%s[data.x]\"%event_output_key : centroids[\"x\"][sel],\n", + " \"%s[data.y]\"%event_output_key : centroids[\"y\"][sel],\n", + " \"%s[data.toa]\"%event_output_key : centroids[\"tof\"][sel],\n", + " \"%s[data.tot_avg]\"%event_output_key : centroids[\"tot_avg\"][sel],\n", + " \"%s[data.tot_max]\"%event_output_key : centroids[\"tot_max\"][sel],\n", + " \"%s[data.centroid_size]\"%event_output_key : centroids[\"size\"][sel],\n", + " \"%s[data.size]\"%event_output_key : len(centroids[\"x\"]),\n", + " }\n", + " else:\n", + " pad = fixed_len - len(centroids[\"x\"])\n", + " out = {\n", + " \"%s[data.trainId]\"%event_output_key : train_id, # int 0d\n", + " \"%s[data.x]\"%event_output_key : np.pad(centroids[\"x\"], (0,pad), constant_values=1), # float 1d\n", + " \"%s[data.y]\"%event_output_key : np.pad(centroids[\"y\"], (0,pad), constant_values=1), # float 1d\n", + " \"%s[data.toa]\"%event_output_key : np.pad(centroids[\"tof\"], (0,pad), constant_values=1), # float 1d\n", + " \"%s[data.tot_avg]\"%event_output_key : np.pad(centroids[\"tot_avg\"], (0,pad), constant_values=1), # float 1d\n", + " \"%s[data.tot_max]\"%event_output_key : np.pad(centroids[\"tot_max\"], (0,pad), constant_values=1), # float 1d\n", + " \"%s[data.centroid_size]\"%event_output_key : np.pad(centroids[\"size\"], (0,pad), constant_values=1), # int 1d\n", + " \"%s[data.size]\"%event_output_key : len(centroids[\"x\"]), # int 0d\n", + " }\n", + "# print(out)" + ] + }, + { + "cell_type": "markdown", + "id": "a39ce0e4-31b3-40c7-9c80-72b89d037714", + "metadata": {}, + "source": [ + "## Settings" + ] + }, + { + "cell_type": "markdown", + "id": "8318bc3f-01be-4144-a4de-ebd736563396", + "metadata": {}, + "source": [ + "### Data selection (for dev)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "693c72cd-9c67-46d6-a0bc-438126e62642", + "metadata": {}, + "outputs": [], + "source": [ + "# Data selection parameters.\n", + "run_id = 420\n", + "proposal = 900256\n" + ] + }, + { + "cell_type": "markdown", + "id": "13956e56-d818-42e4-aa73-7f9c4a897732", + "metadata": {}, + "source": [ + "### Parameters to be exposed with calibration pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9adf469-32de-43b4-91fa-2508a952d0dc", + "metadata": {}, + "outputs": [], + "source": [ + "# Centroiding and output parameters that should be easily adjustable\n", + "max_n_centroids = 10000\n", + "\n", + "clustering_epsilon = 2\n", + "clustering_tof_scale = 1e7\n", + "clustering_min_samples = 3\n", + "clustering_n_jobs = 1\n", + "threshold_tot = None\n", + "raw_timewalk_lut_filepath = None # fpath to look up table for timewalk correction or None\n", + "centroiding_timewalk_lut_filepath = None # fpath to look up table for timewalk correction or None\n", + "\n", + "# raw_timewalk_lut_filepath = \"timewalk_raw.npy\" # fpath to look up table for timewalk correction or None\n", + "# centroiding_timewalk_lut_filepath = \"timewalk_cent.npy\" # fpath to look up table for timewalk correction or None" + ] + }, + { + "cell_type": "markdown", + "id": "c6dff850-18c8-4d5a-9362-e581cce18d8e", + "metadata": {}, + "source": [ + "## Centroiding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56306c15-513e-4e7f-9c47-c52ca61b27a8", + "metadata": {}, + "outputs": [], + "source": [ + "run = open_run(run=run_id, proposal=proposal)\n", + "event_output_key = 'SQS_AQS_CAM/CAM/TIMEPIX3:daqEventOutput'\n", + "\n", + "if raw_timewalk_lut_filepath is not None:\n", + " raw_timewalk_lut = np.load(raw_timewalk_lut_filepath)\n", + "else:\n", + " raw_timewalk_lut = None\n", + "\n", + "if centroiding_timewalk_lut_filepath is not None:\n", + " centroiding_timewalk_lut = np.load(centroiding_timewalk_lut_filepath)\n", + "else:\n", + " centroiding_timewalk_lut = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c28a4c8-961c-496b-80da-7fd867e5b0d3", + "metadata": {}, + "outputs": [], + "source": [ + "t00 = time()\n", + "for train_id, data in run.select(event_output_key, \"data.*\").trains():\n", + " t0 = time()\n", + " sel = np.s_[:data[event_output_key][\"data.size\"]]\n", + " \n", + " tot = np.array(data[event_output_key][\"data.tot\"][sel])\n", + " toa = np.array(data[event_output_key][\"data.toa\"][sel])\n", + " \n", + " if raw_timewalk_lut is not None:\n", + " toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3\n", + " \n", + " centroids = compute_centroids(data[event_output_key][\"data.x\"][sel], \n", + " data[event_output_key][\"data.y\"][sel], \n", + " toa, \n", + " tot,\n", + " threshold_tot=threshold_tot,\n", + " clustering_epsilon=clustering_epsilon,\n", + " clustering_tof_scale=clustering_tof_scale,\n", + " clustering_min_samples=clustering_min_samples,\n", + " clustering_n_jobs=clustering_n_jobs,\n", + " centroiding_timewalk_lut=centroiding_timewalk_lut)\n", + " \n", + "# print(time()-t0)\n", + " output(train_id, centroids, fixed_len=max_n_centroids)\n", + "# if time()-t00 > 0:\n", + "# break\n", + "print(time()-t00)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pycal", + "language": "python", + "name": "pycal" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}