Skip to content
Snippets Groups Projects
Commit 4100fcf3 authored by Steffen Hauf's avatar Steffen Hauf
Browse files

Update AGIPD correction to use library

parent fe762d92
No related branches found
No related tags found
1 merge request!23Fix/larger data
This commit is part of merge request !23. Comments created here will be created in the context of that merge request.
%% Cell type:markdown id: tags:
# AGIPD Offline Correction #
Author: European XFEL Detector Group, Version: 1.0
Offline Calibration for the AGIPD Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/201831/p900039/raw" # the folder to read data from, required
run = 273 # runs to process, required
out_folder = "/gpfs/exfel/exp/SPB/201831/p900039/proc/" # the folder to output to, required
calfile = "/gpfs/exfel/exp/SPB/201831/p900039/usr/calibration_constants230818.h5" # path to calibration file. Leave empty if all data should come from DB
sequences = [0] # sequences to correct, set to -1 for all, range allowed
mem_cells = 176 # memory cells in data
interlaced = False # whether data is in interlaced layout
overwrite = True # set to True if existing data should be overwritten
no_relative_gain = False # do not do relative gain correction
cluster_profile = "noDB"
max_pulses = 500
local_input = False
bias_voltage = 300
cal_db_interface = "tcp://max-exfl015:5005" # the database interface to use
use_dir_creation_date = False # use the creation data of the input dir for database queries
sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
photon_energy = 9.2 # photon energy in keV
index_v = 2 # version of RAW index type
nodb = False # if set only file-based constants will be used
def balance_sequences(in_folder, run, sequences, sequences_per_node):
import glob
import re
import numpy as np
if sequences_per_node != 0:
sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
seq_nums = set()
for sf in sequence_files:
seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
seq_nums.add(int(seqnum))
seq_nums -= set(sequences)
return [l.tolist() for l in np.array_split(list(seq_nums),
len(seq_nums)//sequences_per_node+1)]
else:
return sequences
```
%% Cell type:code id: tags:
``` python
import sys
from collections import OrderedDict
# make sure a cluster is running with ipcluster start --n=32, give it a while to start
import os
import h5py
import numpy as np
import matplotlib
matplotlib.use("agg")
import matplotlib.pyplot as plt
from ipyparallel import Client
print("Connecting to profile {}".format(cluster_profile))
view = Client(profile=cluster_profile)[:]
view.use_dill()
from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
from cal_tools.cal_tools import (gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name,
get_dir_creation_date, get_constant_from_db)
il_mode = interlaced
max_cells = mem_cells//2 if il_mode else mem_cells
gains = np.arange(3)
cells = np.arange(max_cells)
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print("Using {} as creation time".format(creation_time))
in_folder = "{}/r{:04d}".format(in_folder, run)
print("Working in IL Mode: {}. Actual cells in use are: {}".format(il_mode, max_cells))
if sequences[0] == -1:
sequences = None
do_rel_gain = not no_relative_gain
QUADRANTS = 4
MODULES_PER_QUAD = 4
DET_FILE_INSET = "AGIPD"
CHUNK_SIZE = 512
MAX_PAR = 32
if in_folder[-1] == "/":
in_folder = in_folder[:-1]
out_folder = "{}/{}".format(out_folder, os.path.split(in_folder)[-1])
print("Outputting to {}".format(out_folder))
if not os.path.exists(out_folder):
os.makedirs(out_folder)
elif not overwrite:
raise AttributeError("Output path exists! Exiting")
```
%% Output
/gpfs/exfel/data/scratch/haufs/clean_cal/karabo/extern/lib/python3.4/importlib/_bootstrap.py:321: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
return f(*args, **kwds)
Connecting to profile noDB
Working in IL Mode: False. Actual cells in use are: 176
Outputting to /gpfs/exfel/exp/SPB/201831/p900039/proc//r0273
%% Cell type:code id: tags:
``` python
def combine_stack(d, sdim):
combined = np.zeros((sdim, 2048,2048))
combined[...] = np.nan
dy = 0
for i in range(16):
if i < 8:
dx = -512
if i > 3:
dx -= 25
mx = 1
my = i % 8
combined[:, my*128+dy:(my+1)*128+dy,
mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,:,::-1]
dy += 30
if i == 3:
dy += 30
elif i < 12:
dx = 80 - 50
if i == 8:
dy = 4*30 + 30 +50 -30
mx = 1
my = i % 8 +4
combined[:, my*128+dy:(my+1)*128+dy,
mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
dy += 30
else:
dx = 100 - 50
if i == 11:
dy = 20
mx = 1
my = i - 14
combined[:, my*128+dy:(my+1)*128+dy,
mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
dy += 30
return combined
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
from queue import Queue
from collections import OrderedDict
#if not os.path.exists(out_folder):
# os.makedirs(out_folder)
#elif not overwrite:
# raise AttributeError("Output path exists! Exiting")
def map_modules_from_files(filelist):
module_files = OrderedDict()
mod_ids = OrderedDict()
total_sequences = 0
sequences_qm = {}
for quadrant in range(0, QUADRANTS):
for module in range(0, MODULES_PER_QUAD):
name = "Q{}M{}".format(quadrant + 1, module + 1)
module_files[name] = Queue()
num = quadrant * 4 + module
mod_ids[name] = num
file_infix = "{}{:02d}".format(DET_FILE_INSET, num)
sequences_qm[name] = 0
for file in filelist:
if file_infix in file:
module_files[name].put(file)
total_sequences += 1
sequences_qm[name] += 1
return module_files, mod_ids, total_sequences, sequences_qm
dirlist = sorted(os.listdir(in_folder))
file_list = []
for entry in dirlist:
#only h5 file
abs_entry = "{}/{}".format(in_folder, entry)
if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
if sequences is None:
file_list.append(abs_entry)
else:
for seq in sequences:
if "{:05d}.h5".format(seq) in abs_entry:
file_list.append(os.path.abspath(abs_entry))
mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
MAX_PAR = min(MAX_PAR, total_sequences)
```
%% Cell type:markdown id: tags:
## Processed Files ##
%% Cell type:code id: tags:
``` python
import copy
from IPython.display import HTML, display, Markdown, Latex
import tabulate
print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
table = []
mfc = copy.copy(mapped_files)
ti = 0
for k, files in mfc.items():
i = 0
while not files.empty():
f = files.get()
if i == 0:
table.append((ti, k, i, f))
else:
table.append((ti, "", i, f))
i += 1
ti += 1
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
# restore the queue
mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
```
%% Output
Processing a total of 16 sequence files in chunks of 16
\begin{tabular}{rlrl}
\hline
\# & module & \# module & file \\
\hline
0 & Q1M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD00-S00000.h5 \\
1 & Q1M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD01-S00000.h5 \\
2 & Q1M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD02-S00000.h5 \\
3 & Q1M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD03-S00000.h5 \\
4 & Q2M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD04-S00000.h5 \\
5 & Q2M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD05-S00000.h5 \\
6 & Q2M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD06-S00000.h5 \\
7 & Q2M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD07-S00000.h5 \\
8 & Q3M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD08-S00000.h5 \\
9 & Q3M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD09-S00000.h5 \\
10 & Q3M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD10-S00000.h5 \\
11 & Q3M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD11-S00000.h5 \\
12 & Q4M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD12-S00000.h5 \\
13 & Q4M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD13-S00000.h5 \\
14 & Q4M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD14-S00000.h5 \\
15 & Q4M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD15-S00000.h5 \\
\hline
\end{tabular}
%% Cell type:code id: tags:
``` python
import copy
from functools import partial
def correct_module(max_cells, do_rel_gain, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, inp):
import numpy as np
import copy
import h5py
import socket
from datetime import datetime
import re
import os
from influxdb import InfluxDBClient
from cal_tools.enums import BadPixels
from cal_tools.agipdlib import AgipdCorrections
client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')
def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,
total_sequences, success, runtime, reason=""):
return {
"measurement": "run_correction",
"tags": {
"host": socket.gethostname(),
"run": run,
"proposal": proposal,
"mem_cells": max_cells,
"sequence": sequence,
"module": qm,
"filesize": filesize,
"chunksize": chunksize,
"total_sequences": total_sequences,
"sequences_module": sequences_qm[qm],
"detector": "AGIPD",
"instrument": "SPB",
},
"time": datetime.utcnow().isoformat(),
"fields": {
"success": success,
"reason": reason,
"runtime": runtime,
}
}
hists_signal_low = None
hists_signal_high = None
hists_gain_vs_signal = None
hists_dig_gain_vs_signal = None
low_edges = None
high_edges = None
signal_edges = None
dig_signal_edges = None
if True: #try:
start = datetime.now()
success = True
reason = ""
filename, filename_out, channel, qm = inp
infile = h5py.File(filename, "r", driver="core")
outfile = h5py.File(filename_out, "w")
agipd_corr = AgipdCorrections(infile, outfile, max_cells, channel, max_pulses,
bins_gain_vs_signal, bins_signal_low_range,
bins_signal_high_range, bins_dig_gain_vs_signal,
do_rel_gain=do_rel_gain)
try:
agipd_corr.get_valid_image_idx()
except IOError:
return
if not nodb:
agipd_corr.initialize_from_db(dbparms, qm, only_dark=(fileparms != ""))
if fileparms != "":
agipd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
print("Initialized constants")
for irange in agipd_corr.get_iteration_range():
agipd_corr.correct_agipd(irange)
print("Iterated")
print("All interations finished")
hists, edges = agipd_corr.get_histograms()
hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal= hists
low_edges, high_edges, signal_edges, dig_signal_edges = edges
outfile.close()
infile.close()
print("Closed files")
#except Exception as e:
# print(e)
success = False
reason = "Error"
#finally:
run = re.findall(r'.*r([0-9]{4}).*', filename)[0]
proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]
sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]
filesize = os.path.getsize(filename)
duration = (datetime.now()-start).total_seconds()
influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)
client.write_points([influx])
return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal,
low_edges, high_edges, signal_edges, dig_signal_edges)
done = False
first_files = []
inp = []
left = total_sequences
bins_gain_vs_signal = (100, 100)
bins_signal_low_range = 100
bins_signal_high_range = 100
bins_dig_gain_vs_signal = (100, 4)
hists_signal_low = np.zeros((bins_signal_low_range, max_pulses), np.float64)
hists_signal_high = np.zeros((bins_signal_low_range, max_pulses), np.float64)
hists_gain_vs_signal = np.zeros((bins_gain_vs_signal), np.float64)
hists_dig_gain_vs_signal = np.zeros((bins_dig_gain_vs_signal), np.float64)
low_edges, high_edges, signal_edges, dig_signal_edges = None, None, None, None
dbparms = cal_db_interface, creation_time, max_cells, bias_voltage, photon_energy
fileparms = calfile
while not done:
dones = []
first = True
for i in range(16):
qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
if qm in mapped_files and not mapped_files[qm].empty():
fname_in = str(mapped_files[qm].get())
dones.append(mapped_files[qm].empty())
else:
print("Skipping {}".format(qm))
first_files.append((None, None))
continue
fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
if first:
first_files.append((fname_in, fout))
inp.append((fname_in, fout, i, qm))
first = False
if len(inp) >= min(MAX_PAR, left):
print("Running {} tasks parallel".format(len(inp)))
p = partial(correct_module, max_cells, do_rel_gain, index_v, CHUNK_SIZE, total_sequences,
sequences_qm, bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb)
r = view.map_sync(p, inp)
#r = list(map(p, inp))
inp = []
left -= MAX_PAR
for rr in r:
if rr is not None:
hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_edges = rr
hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_signal_edges = rr
if hl is not None: # any one being None will also make the others None
hists_signal_low += hl.astype(np.float64)
hists_signal_high += hh.astype(np.float64)
hists_gain_vs_signal += hg.astype(np.float64)
hists_dig_gain_vs_signal += hdg.astype(np.float64)
done = all(dones)
```
%% Output
Running 16 tasks parallel
%% Cell type:code id: tags:
``` python
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np
%matplotlib inline
def do_3d_plot(data, edges, x_axis, y_axis):
fig = plt.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
# Make data.
X = edges[0][:-1]
Y = edges[1][:-1]
X, Y = np.meshgrid(X, Y)
Z = data.T
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_zlabel("Counts")
```
%% Cell type:markdown id: tags:
## Signal vs. Analogue Gain ##
The following plot shows plots signal vs. gain for the first 128 images.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Analogue gain (ADU)")
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis):
from matplotlib.colors import LogNorm
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
cb = fig.colorbar(im)
cb.set_label("Counts")
do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Value (ADU)")
```
%% Cell type:markdown id: tags:
## Signal vs. Digitized Gain ##
The following plot shows plots signal vs. digitized gain for the first 128 images.
%% Cell type:code id: tags:
``` python
do_2d_plot(hists_dig_gain_vs_signal, dig_signal_edges, "Signal (ADU)", "Gain Bit Value")
```
%% Cell type:markdown id: tags:
## Mean Intensity per Pulse ##
The following plots show the mean signal for each pulse in a detailed and expanded intensity region.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
corrected = []
raw = []
gains = []
mask = []
for i, ff in enumerate(first_files):
try:
rf, cf = ff
#print(cf, i)
if rf is None:
print(rf)
raise Exception("File not present")
infile = h5py.File(rf, "r")
raw.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][:max_cells,0,...]))
infile.close()
infile = h5py.File(cf, "r")
#print("/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i))
corrected.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][:max_cells,...]))
gains.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/gain".format(i)][:max_cells,...]))
mask.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/mask".format(i)][:max_cells,...]))
infile.close()
except Exception as e:
corrected.append(np.zeros((max_cells, 512, 128)))
gains.append(np.zeros((max_cells, 512, 128)))
mask.append(np.zeros((max_cells, 512, 128)))
```
%% Cell type:code id: tags:
``` python
domask = True
if domask:
for i, c in enumerate(corrected):
c[mask[i] != 0] = 0
combined = combine_stack(corrected, corrected[0].shape[0])
combined_raw = combine_stack(raw, raw[0].shape[0])
combined_g = combine_stack(gains, gains[0].shape[0])
combined_mask = combine_stack(mask, mask[0].shape[0])
```
%% Cell type:markdown id: tags:
### Mean RAW Preview ###
The per pixel mean of the first 128 images of the RAW data
%% Cell type:code id: tags:
``` python
%matplotlib inline
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.mean(combined_raw[:,:1300,400:1600],axis=0),
vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), 4000),
vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 7000), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Single Shot Preview ###
A single shot image from cell 12 of the first train
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
dim = combined[70,:1300,400:1600]
im = ax.imshow(dim, vmin=-5,
vmax=max(10*np.median(dim[dim > 0]), 100), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Mean CORRECTED Preview ###
The per pixel mean of the first 128 images of the CORRECTED data
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.mean(combined[:,:1300,400:1600], axis=0), vmin=-5,
vmax=max(10*np.median(combined[combined > 0]), 100), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Maximum GAIN Preview ###
The per pixel maximum of the first 128 images of the digitized GAIN data
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.max(combined_g[:,:1300,400:1600], axis=0), vmin=0,
vmax=3, cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
from cal_tools.enums import BadPixels
from IPython.display import HTML, display, Markdown, Latex
import tabulate
table = []
for item in BadPixels:
table.append((item.name, "{:016b}".format(item.value)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:markdown id: tags:
### Single Shot Bad Pixels ###
A single shot bad pixel map from cell 4 of the first train
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.log2(combined_mask[4,:1300,400:1600]), vmin=0,
vmax=32, cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment