Skip to content
Snippets Groups Projects

[Timepix] Store labels assigned to pixels

Merged Philipp Schmidt requested to merge feat/timepix-labels into master
3 unresolved threads
``` python
``` python
# Data selection parameters.
# Data selection parameters.
run = 420 # required
run = 307 # required
in_folder = '/gpfs/exfel/exp/SQS/202230/p900256/raw' # required
in_folder = '/gpfs/exfel/exp/SQS/202430/p900421/raw' # required
out_folder = '/gpfs/exfel/exp/SQS/202230/p900256/scratch/cal_test' # required
out_folder = '/gpfs/exfel/exp/SQS/202430/p900421/scratch/cal_test' # required
proposal = '' # Proposal, leave empty for auto detection based on in_folder
proposal = '' # Proposal, leave empty for auto detection based on in_folder
# These parameters are required by xfel-calibrate but ignored in this notebook.
# These parameters are required by xfel-calibrate but ignored in this notebook.
return tpx_data
return tpx_data
def pre_clustering_filter(tpx_data, tot_threshold=0):
"""
Collection of filters directly applied before clustering.
Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!
Parameters
----------
tpx_data: Dictionary with timepix data, all arrays behind each key must be of same length
tot_threshold: minimum ToT required for a pixel to contain valid data
Returns
-------
tpx_data: like input tpx_data but with applied filters
"""
if tot_threshold > 0:
tpx_data = apply_single_filter(tpx_data, tpx_data["tot"] >= tot_threshold)
return tpx_data
def post_clustering_filter(tpx_data):
"""
Collection of filters directly applied after clustering.
Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!
Parameters
----------
tpx_data: Dictionary with timepix data, all arrays behind each key must be of same length, now with key labels
Returns
-------
tpx_data: like input tpx_data but with applied filters
"""
if tpx_data["labels"] is not None:
tpx_data = apply_single_filter(tpx_data, tpx_data["labels"] != 0)
return tpx_data
def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):
def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):
"""
"""
"""
"""
coords = np.column_stack((tpx_data["x"], tpx_data["y"], tpx_data["toa"]*tof_scale))
coords = np.column_stack((tpx_data["x"], tpx_data["y"], tpx_data["toa"]*tof_scale))
dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric="euclidean", n_jobs=n_jobs).fit(coords)
dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric="euclidean", n_jobs=n_jobs).fit(coords)
return dist.labels_ + 1
return dist.labels_
def empty_centroid_data():
def empty_centroid_data():
return {
return {
print("Data validation failed with message: %s" % error_msgs[data_validation])
print("Data validation failed with message: %s" % error_msgs[data_validation])
else:
else:
print("Data validation failed: unknown reason")
print("Data validation failed: unknown reason")
return empty_centroid_data()
return np.array([]), empty_centroid_data()
# clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,
# clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,
# each sample belonging to a cluster is labeled with an integer cluster id no)
# each sample belonging to a cluster is labeled with an integer cluster id no)
_tpx_data = pre_clustering_filter(_tpx_data, tot_threshold=threshold_tot)
if threshold_tot > 0:
_tpx_data["labels"] = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples)
_tpx_data = apply_single_filter(_tpx_data, _tpx_data["tot"] >= threshold_tot)
_tpx_data = post_clustering_filter(_tpx_data)
 
labels = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples)
 
_tpx_data["labels"] = labels
 
 
if labels is not None:
 
_tpx_data = apply_single_filter(_tpx_data, labels >= 0)
 
Please register or sign in to reply
# compute centroid data (reduce cluster of samples to a single point with properties)
# compute centroid data (reduce cluster of samples to a single point with properties)
if _tpx_data["labels"] is None or _tpx_data["labels"].size == 0:
if labels is None or len(_tpx_data['x']) == 0:
# handle case of no identified clusters, return empty dictionary with expected keys
# handle case of no identified clusters, return empty dictionary with expected keys
return empty_centroid_data()
return np.array([]), empty_centroid_data()
_centroids = get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)
return labels, get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)
return _centroids
def process_train(worker_id, index, train_id, data):
def process_train(worker_id, index, train_id, data):
if raw_timewalk_lut is not None:
if raw_timewalk_lut is not None:
toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3
toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3
centroids = compute_centroids(x, y, toa, tot, **centroiding_kwargs)
labels, centroids = compute_centroids(x, y, toa, tot, **centroiding_kwargs)
num_centroids = len(centroids['x'])
num_centroids = len(centroids['x'])
fraction_centroids = np.sum(centroids["size"])/events['data.size'] if events['data.size']>0 else np.nan
fraction_centroids = np.sum(centroids["size"])/events['data.size'] if events['data.size']>0 else np.nan
for key in centroid_dt.names:
for key in centroid_dt.names:
out_data[index, :num_centroids][key] = centroids[key]
out_data[index, :num_centroids][key] = centroids[key]
 
out_labels[index, :len(labels)] = labels
out_stats[index]["fraction_px_in_centroids"] = fraction_centroids
out_stats[index]["fraction_px_in_centroids"] = fraction_centroids
out_stats[index]["N_centroids"] = num_centroids
out_stats[index]["N_centroids"] = num_centroids
out_stats[index]["missing_centroids"] = missing_centroids
out_stats[index]["missing_centroids"] = missing_centroids
('tot_max', np.uint16),
('tot_max', np.uint16),
('size', np.int16)])
('size', np.int16)])
 
pixel_shape = in_dc[in_fast_data]['data.x'].entry_shape
centroid_settings_template = {
centroid_settings_template = {
'timewalk_correction.raw_applied': (np.bool, bool(raw_timewalk_lut_filepath)),
'timewalk_correction.raw_applied': (np.bool, bool(raw_timewalk_lut_filepath)),
control_sources=[out_device_id],
control_sources=[out_device_id],
instrument_channels=[f'{out_fast_data}/data'])
instrument_channels=[f'{out_fast_data}/data'])
seq_file.create_index(train_ids)
seq_file.create_index(train_ids)
 
out_labels = psh.alloc(shape=(len(train_ids),) + pixel_shape, dtype=np.int32)
out_data = psh.alloc(shape=(len(train_ids), max_num_centroids), dtype=centroid_dt)
out_data = psh.alloc(shape=(len(train_ids), max_num_centroids), dtype=centroid_dt)
out_stats = psh.alloc(shape=(len(train_ids),), dtype=centroid_stats_dt)
out_stats = psh.alloc(shape=(len(train_ids),), dtype=centroid_stats_dt)
 
out_labels[:] = -1
out_data[:] = (np.nan, np.nan, np.nan, np.nan, np.nan, 0, -1)
out_data[:] = (np.nan, np.nan, np.nan, np.nan, np.nan, 0, -1)
out_stats[:] = tuple([centroid_stats_template[key][1] for key in centroid_stats_template])
out_stats[:] = tuple([centroid_stats_template[key][1] for key in centroid_stats_template])
for key, (type_, data) in centroid_settings_template.items():
for key, (type_, data) in centroid_settings_template.items():
cur_slow_data.create_run_key(f'settings.{key}', data)
cur_slow_data.create_run_key(f'settings.{key}', data)
 
cur_fast_data.create_key('data.labels', data=out_labels,
 
chunks=(1,) + pixel_shape, **dataset_kwargs)
cur_fast_data.create_key('data.centroids', out_data,
cur_fast_data.create_key('data.centroids', out_data,
chunks=tuple(chunks_centroids),
chunks=tuple(chunks_centroids),
**dataset_kwargs)
**dataset_kwargs)
Loading