diff --git a/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb index 036bb35f7d417d5afa27d3fc66f80c3c55dabc7a..c84fcbb313e9b243dccce3aa4e6ee3df3c26a0f6 100755 --- a/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb +++ b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb @@ -20,9 +20,9 @@ "outputs": [], "source": [ "# Data selection parameters.\n", - "run = 420 # required\n", - "in_folder = '/gpfs/exfel/exp/SQS/202230/p900256/raw' # required\n", - "out_folder = '/gpfs/exfel/exp/SQS/202230/p900256/scratch/cal_test' # required\n", + "run = 307 # required\n", + "in_folder = '/gpfs/exfel/exp/SQS/202430/p900421/raw' # required\n", + "out_folder = '/gpfs/exfel/exp/SQS/202430/p900421/scratch/cal_test' # required\n", "proposal = '' # Proposal, leave empty for auto detection based on in_folder\n", "\n", "# These parameters are required by xfel-calibrate but ignored in this notebook.\n", @@ -141,44 +141,6 @@ " return tpx_data\n", "\n", "\n", - "def pre_clustering_filter(tpx_data, tot_threshold=0):\n", - " \"\"\"\n", - " Collection of filters directly applied before clustering.\n", - " Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n", - "\n", - " Parameters\n", - " ----------\n", - " tpx_data: Dictionary with timepix data, all arrays behind each key must be of same length\n", - " tot_threshold: minimum ToT required for a pixel to contain valid data\n", - "\n", - " Returns\n", - " -------\n", - " tpx_data: like input tpx_data but with applied filters\n", - " \"\"\"\n", - " if tot_threshold > 0:\n", - " tpx_data = apply_single_filter(tpx_data, tpx_data[\"tot\"] >= tot_threshold)\n", - "\n", - " return tpx_data\n", - "\n", - "\n", - "def post_clustering_filter(tpx_data):\n", - " \"\"\"\n", - " Collection of filters directly applied after clustering.\n", - " Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n", - "\n", - " Parameters\n", - " ----------\n", - " tpx_data: Dictionary with timepix data, all arrays behind each key must be of same length, now with key labels\n", - "\n", - " Returns\n", - " -------\n", - " tpx_data: like input tpx_data but with applied filters\n", - " \"\"\"\n", - " if tpx_data[\"labels\"] is not None:\n", - " tpx_data = apply_single_filter(tpx_data, tpx_data[\"labels\"] != 0)\n", - "\n", - " return tpx_data\n", - "\n", "\n", "def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):\n", " \"\"\"\n", @@ -203,7 +165,7 @@ " \"\"\"\n", " coords = np.column_stack((tpx_data[\"x\"], tpx_data[\"y\"], tpx_data[\"toa\"]*tof_scale))\n", " dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric=\"euclidean\", n_jobs=n_jobs).fit(coords)\n", - " return dist.labels_ + 1\n", + " return dist.labels_\n", "\n", "def empty_centroid_data():\n", " return {\n", @@ -264,19 +226,24 @@ " print(\"Data validation failed with message: %s\" % error_msgs[data_validation])\n", " else:\n", " print(\"Data validation failed: unknown reason\")\n", - " return empty_centroid_data()\n", + " return np.array([]), empty_centroid_data()\n", "\n", " # clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,\n", " # each sample belonging to a cluster is labeled with an integer cluster id no)\n", - " _tpx_data = pre_clustering_filter(_tpx_data, tot_threshold=threshold_tot)\n", - " _tpx_data[\"labels\"] = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples)\n", - " _tpx_data = post_clustering_filter(_tpx_data)\n", + " if threshold_tot > 0:\n", + " _tpx_data = apply_single_filter(_tpx_data, _tpx_data[\"tot\"] >= threshold_tot) \n", + "\n", + " labels = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples)\n", + " _tpx_data[\"labels\"] = labels\n", + " \n", + " if labels is not None:\n", + " _tpx_data = apply_single_filter(_tpx_data, labels >= 0)\n", + " \n", " # compute centroid data (reduce cluster of samples to a single point with properties)\n", - " if _tpx_data[\"labels\"] is None or _tpx_data[\"labels\"].size == 0:\n", + " if labels is None or len(_tpx_data['x']) == 0:\n", " # handle case of no identified clusters, return empty dictionary with expected keys\n", - " return empty_centroid_data()\n", - " _centroids = get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)\n", - " return _centroids\n", + " return np.array([]), empty_centroid_data()\n", + " return labels, get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)\n", "\n", "\n", "def process_train(worker_id, index, train_id, data):\n", @@ -292,7 +259,7 @@ " if raw_timewalk_lut is not None:\n", " toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3\n", "\n", - " centroids = compute_centroids(x, y, toa, tot, **centroiding_kwargs)\n", + " labels, centroids = compute_centroids(x, y, toa, tot, **centroiding_kwargs)\n", "\n", " num_centroids = len(centroids['x'])\n", " fraction_centroids = np.sum(centroids[\"size\"])/events['data.size'] if events['data.size']>0 else np.nan\n", @@ -303,6 +270,7 @@ "\n", " for key in centroid_dt.names:\n", " out_data[index, :num_centroids][key] = centroids[key]\n", + " out_labels[index, :len(labels)] = labels\n", " out_stats[index][\"fraction_px_in_centroids\"] = fraction_centroids\n", " out_stats[index][\"N_centroids\"] = num_centroids\n", " out_stats[index][\"missing_centroids\"] = missing_centroids" @@ -357,6 +325,7 @@ " ('tot_max', np.uint16),\n", " ('size', np.int16)])\n", "\n", + "pixel_shape = in_dc[in_fast_data]['data.x'].entry_shape\n", "\n", "centroid_settings_template = {\n", " 'timewalk_correction.raw_applied': (np.bool, bool(raw_timewalk_lut_filepath)),\n", @@ -401,10 +370,12 @@ " control_sources=[out_device_id],\n", " instrument_channels=[f'{out_fast_data}/data'])\n", " seq_file.create_index(train_ids)\n", - " \n", + " \n", + " out_labels = psh.alloc(shape=(len(train_ids),) + pixel_shape, dtype=np.int32)\n", " out_data = psh.alloc(shape=(len(train_ids), max_num_centroids), dtype=centroid_dt)\n", " out_stats = psh.alloc(shape=(len(train_ids),), dtype=centroid_stats_dt)\n", " \n", + " out_labels[:] = -1\n", " out_data[:] = (np.nan, np.nan, np.nan, np.nan, np.nan, 0, -1)\n", " out_stats[:] = tuple([centroid_stats_template[key][1] for key in centroid_stats_template])\n", " \n", @@ -421,6 +392,8 @@ " for key, (type_, data) in centroid_settings_template.items():\n", " cur_slow_data.create_run_key(f'settings.{key}', data)\n", " \n", + " cur_fast_data.create_key('data.labels', data=out_labels,\n", + " chunks=(1,) + pixel_shape, **dataset_kwargs)\n", " cur_fast_data.create_key('data.centroids', out_data,\n", " chunks=tuple(chunks_centroids),\n", " **dataset_kwargs)\n",