Skip to content
Snippets Groups Projects
Commit 7b9a301d authored by Philipp Schmidt's avatar Philipp Schmidt
Browse files

Add original Timepix centroiding notebook for reference

parent 130a3247
No related branches found
No related tags found
1 merge request!645[TIMEPIX] Add notebook to compute centroids of pixel events
%% Cell type:markdown id:4d04f210-fc4a-4b1d-9dfe-9c85b94b884f tags:
## Imports
%% Cell type:code id:524fe654-e112-4abe-813c-a0be9b3a3034 tags:
``` python
from time import time
import numpy as np
from extra_data import open_run, RunDirectory
```
%% Cell type:code id:392aa413-0035-4861-9f1f-03abb51d0613 tags:
``` python
%matplotlib notebook
p900256
```
%% Cell type:markdown id:3911b4e1-11af-4a51-be59-103ca9f6f261 tags:
## Centroiding functions
%% Cell type:code id:e36f997c-4b66-4b11-99a8-5887e3572f56 tags:
``` python
# centroiding
import numpy as np
import scipy.ndimage as nd
from sklearn.cluster import DBSCAN
from warnings import warn
import traceback as tb
error_msgs = {
-1: "tpx_data has an invalid structure - ignore provided data",
-2: "tpx_data arrays are of invalid lengths - ignore provided data",
-3: "tpx_data arrays are empty"
}
def check_data(tpx_data):
required_keys = ["x", "y", "tof", "tot"]
for key in required_keys:
if key not in tpx_data.keys():
warn("tpx data must contain the keys %s, but key %s not in tpx data keys (%s)" % (required_keys, key, list(tpx_data.keys())),
category=UserWarning)
return -1
reference_n_samples_key = "x"
n_samples = len(tpx_data[reference_n_samples_key])
for key in tpx_data.keys():
if n_samples != len(tpx_data[key]):
warn("arrays in tpx data must be of same length ( len(tpx_data[%s])=%i!=%i=(len(tpx_data[%s]) )" % (reference_n_samples_key, n_samples, len(tpx_data[key]), key),
category=UserWarning)
return -2
if n_samples == 0:
warn("no samples were provides with tpx data", category=UserWarning)
return -3
return 0
def apply_single_filter(tpx_data, _filter):
"""
Simple function to apply a selecting or sorting filter to a dictionary of equally sized arrays
Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!
Parameters
----------
tpx_data: dictionary with timepix data, all arrays behind each key must be of same length
_filter: 1d array or list of integers or booleans or np.s_ to select or sort data like a = a[_filter]
Returns
-------
tpx_data: like input tpx_data but with applied filter
"""
try:
for key in tpx_data.keys():
tpx_data[key] = np.array(tpx_data[key])[_filter]
except Exception as e:
# print(tpx_data)
print(_filter)
print(_filter.dtype)
print(_filter.shape)
print(tpx_data[key].shape)
raise e
return tpx_data
def pre_clustering_filter(tpx_data, tot_threshold=None):
"""
Collection of filters directly applied before clustering.
Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!
Parameters
----------
tpx_data: dictionary with timepix data, all arrays behind each key must be of same length
tot_threshold:
Returns
-------
tpx_data: like input tpx_data but with applied filters
"""
if tot_threshold is not None:
tpx_data = apply_single_filter(tpx_data, tpx_data["tot"] >= tot_threshold)
return tpx_data
def post_clustering_filter(tpx_data):
"""
Collection of filters directly applied after clustering.
Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!
Parameters
----------
tpx_data: dictionary with timepix data, all arrays behind each key must be of same length, now with key labels
Returns
-------
tpx_data: like input tpx_data but with applied filters
"""
if tpx_data["labels"] is not None:
tpx_data = apply_single_filter(tpx_data, tpx_data["labels"] != 0)
return tpx_data
def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):
"""
Parameters
----------
tpx_data
epsilon
tof_scale
min_samples
n_jobs
Returns
-------
"""
coords = np.column_stack((tpx_data["x"], tpx_data["y"], tpx_data["tof"]*tof_scale))
dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric="euclidean", n_jobs=n_jobs).fit(coords)
return dist.labels_ + 1
def empty_centroid_data():
return {
"x": np.array([]),
"y": np.array([]),
"tof": np.array([]),
"tot_avg": np.array([]),
"tot_max": np.array([]),
"size": np.array([]),
}
def get_centroids(tpx_data, timewalk_lut=None):
centroid_data = empty_centroid_data()
cluster_labels, cluster_size = np.unique(tpx_data["labels"], return_counts=True)
cluster_tot_peaks = np.array(nd.maximum_position(tpx_data["tot"], labels=tpx_data["labels"], index=cluster_labels)).ravel()
cluster_tot_integrals = nd.sum(tpx_data["tot"], labels=tpx_data["labels"], index=cluster_labels)
# compute centroid center through weighted average
centroid_data["x"] = np.array(nd.sum(tpx_data["x"] * tpx_data["tot"], labels=tpx_data["labels"], index=cluster_labels) / cluster_tot_integrals).ravel()
centroid_data["y"] = np.array(nd.sum(tpx_data["y"] * tpx_data["tot"], labels=tpx_data["labels"], index=cluster_labels) / cluster_tot_integrals).ravel()
centroid_data["tof"] = np.array(nd.sum(tpx_data["tof"] * tpx_data["tot"], labels=tpx_data["labels"], index=cluster_labels) / cluster_tot_integrals).ravel()
# intensity & size information
centroid_data["tot_avg"] = np.array(nd.mean(tpx_data["tot"], labels=tpx_data["labels"], index=cluster_labels))
centroid_data["tot_max"] = tpx_data["tot"][cluster_tot_peaks]
centroid_data["tot"] = np.array(cluster_tot_integrals)
centroid_data["size"] = cluster_size
# train ID information
# ~ centroid_data["tid"] = tpx_data["tid"][cluster_tot_peaks]
# correct for timewalk if provided
if timewalk_lut is not None:
centroid_data["tof"] -= timewalk_lut[np.int_(centroid_data["tot_max"] // 25) - 1] * 1e3
return centroid_data
def compute_centroids(x, y, tof, tot,
threshold_tot=None,
clustering_epsilon=2,
clustering_tof_scale=1e7,
clustering_min_samples=3,
clustering_n_jobs=1,
centroiding_timewalk_lut=None):
# format input data
_tpx_data = {
"x": x,
"y": y,
"tof": tof,
"tot": tot
}
# ensure that valid data is available
data_validation = check_data(_tpx_data)
if data_validation < 0:
if data_validation in error_msgs.keys():
print("Data validation failed with message: %s" % error_msgs[data_validation])
else:
print("Data validation failed: unknown reason")
return None
# clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,
# each sample belonging to a cluster is labeled with an integer cluster id no)
_tpx_data = pre_clustering_filter(_tpx_data, tot_threshold=threshold_tot)
_tpx_data["labels"] = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples, n_jobs=clustering_n_jobs)
_tpx_data = post_clustering_filter(_tpx_data)
# compute centroid data (reduce cluster of samples to a single point with properties)
if _tpx_data["labels"] is None or _tpx_data["labels"].size == 0:
# handle case of no identified clusters, return empty dictionary with expected keys
return empty_centroid_data()
_centroids = get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)
return _centroids
```
%% Cell type:markdown id:4f54e558-d862-48af-86b9-7d9e3d53fd3d tags:
## Output Function (for illustration) - instead data should be written to output file train-id wise
%% Cell type:code id:83acf826-e351-4a39-a870-6c1e0298e711 tags:
``` python
def output(train_id, centroids, fixed_len=None):
"""
In this version the resulting vectors within a run vary in length,
if fixed len is provided fixed length vectors are generated
"""
if fixed_len is None:
out = {
"%s[data.trainId]"%event_output_key : train_id,
"%s[data.x]"%event_output_key : centroids["x"],
"%s[data.y]"%event_output_key : centroids["y"],
"%s[data.toa]"%event_output_key : centroids["tof"],
"%s[data.tot_avg]"%event_output_key : centroids["tot_avg"],
"%s[data.tot_max]"%event_output_key : centroids["tot_max"],
"%s[data.size]"%event_output_key : centroids["size"],
}
else:
if len(centroids["x"])>fixed_len:
sel = np.s_[:fixed_len]
out = {
"%s[data.trainId]"%event_output_key : train_id,
"%s[data.x]"%event_output_key : centroids["x"][sel],
"%s[data.y]"%event_output_key : centroids["y"][sel],
"%s[data.toa]"%event_output_key : centroids["tof"][sel],
"%s[data.tot_avg]"%event_output_key : centroids["tot_avg"][sel],
"%s[data.tot_max]"%event_output_key : centroids["tot_max"][sel],
"%s[data.centroid_size]"%event_output_key : centroids["size"][sel],
"%s[data.size]"%event_output_key : len(centroids["x"]),
}
else:
pad = fixed_len - len(centroids["x"])
out = {
"%s[data.trainId]"%event_output_key : train_id, # int 0d
"%s[data.x]"%event_output_key : np.pad(centroids["x"], (0,pad), constant_values=1), # float 1d
"%s[data.y]"%event_output_key : np.pad(centroids["y"], (0,pad), constant_values=1), # float 1d
"%s[data.toa]"%event_output_key : np.pad(centroids["tof"], (0,pad), constant_values=1), # float 1d
"%s[data.tot_avg]"%event_output_key : np.pad(centroids["tot_avg"], (0,pad), constant_values=1), # float 1d
"%s[data.tot_max]"%event_output_key : np.pad(centroids["tot_max"], (0,pad), constant_values=1), # float 1d
"%s[data.centroid_size]"%event_output_key : np.pad(centroids["size"], (0,pad), constant_values=1), # int 1d
"%s[data.size]"%event_output_key : len(centroids["x"]), # int 0d
}
# print(out)
```
%% Cell type:markdown id:a39ce0e4-31b3-40c7-9c80-72b89d037714 tags:
## Settings
%% Cell type:markdown id:8318bc3f-01be-4144-a4de-ebd736563396 tags:
### Data selection (for dev)
%% Cell type:code id:693c72cd-9c67-46d6-a0bc-438126e62642 tags:
``` python
# Data selection parameters.
run_id = 420
proposal = 900256
```
%% Cell type:markdown id:13956e56-d818-42e4-aa73-7f9c4a897732 tags:
### Parameters to be exposed with calibration pipeline
%% Cell type:code id:a9adf469-32de-43b4-91fa-2508a952d0dc tags:
``` python
# Centroiding and output parameters that should be easily adjustable
max_n_centroids = 10000
clustering_epsilon = 2
clustering_tof_scale = 1e7
clustering_min_samples = 3
clustering_n_jobs = 1
threshold_tot = None
raw_timewalk_lut_filepath = None # fpath to look up table for timewalk correction or None
centroiding_timewalk_lut_filepath = None # fpath to look up table for timewalk correction or None
# raw_timewalk_lut_filepath = "timewalk_raw.npy" # fpath to look up table for timewalk correction or None
# centroiding_timewalk_lut_filepath = "timewalk_cent.npy" # fpath to look up table for timewalk correction or None
```
%% Cell type:markdown id:c6dff850-18c8-4d5a-9362-e581cce18d8e tags:
## Centroiding
%% Cell type:code id:56306c15-513e-4e7f-9c47-c52ca61b27a8 tags:
``` python
run = open_run(run=run_id, proposal=proposal)
event_output_key = 'SQS_AQS_CAM/CAM/TIMEPIX3:daqEventOutput'
if raw_timewalk_lut_filepath is not None:
raw_timewalk_lut = np.load(raw_timewalk_lut_filepath)
else:
raw_timewalk_lut = None
if centroiding_timewalk_lut_filepath is not None:
centroiding_timewalk_lut = np.load(centroiding_timewalk_lut_filepath)
else:
centroiding_timewalk_lut = None
```
%% Cell type:code id:8c28a4c8-961c-496b-80da-7fd867e5b0d3 tags:
``` python
t00 = time()
for train_id, data in run.select(event_output_key, "data.*").trains():
t0 = time()
sel = np.s_[:data[event_output_key]["data.size"]]
tot = np.array(data[event_output_key]["data.tot"][sel])
toa = np.array(data[event_output_key]["data.toa"][sel])
if raw_timewalk_lut is not None:
toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3
centroids = compute_centroids(data[event_output_key]["data.x"][sel],
data[event_output_key]["data.y"][sel],
toa,
tot,
threshold_tot=threshold_tot,
clustering_epsilon=clustering_epsilon,
clustering_tof_scale=clustering_tof_scale,
clustering_min_samples=clustering_min_samples,
clustering_n_jobs=clustering_n_jobs,
centroiding_timewalk_lut=centroiding_timewalk_lut)
# print(time()-t0)
output(train_id, centroids, fixed_len=max_n_centroids)
# if time()-t00 > 0:
# break
print(time()-t00)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment