Skip to content
Snippets Groups Projects

Fix/larger data

Merged Steffen Hauf requested to merge fix/larger_data into master
1 unresolved thread
1 file
+ 1
36
Compare changes
  • Side-by-side
  • Inline
%% Cell type:markdown id: tags:
# AGIPD Offline Correction #
Author: European XFEL Detector Group, Version: 1.0
Offline Calibration for the AGIPD Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/201831/p900039/raw" # the folder to read data from, required
run = 273 # runs to process, required
out_folder = "/gpfs/exfel/exp/SPB/201831/p900039/proc/" # the folder to output to, required
calfile = "/gpfs/exfel/exp/SPB/201831/p900039/usr/calibration_constants230818.h5" # path to calibration file. Leave empty if all data should come from DB
sequences = [0] # sequences to correct, set to -1 for all, range allowed
mem_cells = 176 # memory cells in data
interlaced = False # whether data is in interlaced layout
overwrite = True # set to True if existing data should be overwritten
no_relative_gain = False # do not do relative gain correction
cluster_profile = "noDB"
max_pulses = 500
local_input = False
bias_voltage = 300
cal_db_interface = "tcp://max-exfl015:5005" # the database interface to use
use_dir_creation_date = False # use the creation data of the input dir for database queries
sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
photon_energy = 9.2 # photon energy in keV
index_v = 2 # version of RAW index type
nodb = False # if set only file-based constants will be used
def balance_sequences(in_folder, run, sequences, sequences_per_node):
import glob
import re
import numpy as np
if sequences_per_node != 0:
sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
seq_nums = set()
for sf in sequence_files:
seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
seq_nums.add(int(seqnum))
seq_nums -= set(sequences)
return [l.tolist() for l in np.array_split(list(seq_nums),
len(seq_nums)//sequences_per_node+1)]
else:
return sequences
```
%% Cell type:code id: tags:
``` python
import sys
from collections import OrderedDict
# make sure a cluster is running with ipcluster start --n=32, give it a while to start
import os
import h5py
import numpy as np
import matplotlib
matplotlib.use("agg")
import matplotlib.pyplot as plt
from ipyparallel import Client
print("Connecting to profile {}".format(cluster_profile))
view = Client(profile=cluster_profile)[:]
view.use_dill()
from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
from cal_tools.cal_tools import (gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name,
get_dir_creation_date, get_constant_from_db)
il_mode = interlaced
max_cells = mem_cells//2 if il_mode else mem_cells
gains = np.arange(3)
cells = np.arange(max_cells)
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print("Using {} as creation time".format(creation_time))
in_folder = "{}/r{:04d}".format(in_folder, run)
print("Working in IL Mode: {}. Actual cells in use are: {}".format(il_mode, max_cells))
if sequences[0] == -1:
sequences = None
do_rel_gain = not no_relative_gain
QUADRANTS = 4
MODULES_PER_QUAD = 4
DET_FILE_INSET = "AGIPD"
CHUNK_SIZE = 512
MAX_PAR = 32
if in_folder[-1] == "/":
in_folder = in_folder[:-1]
out_folder = "{}/{}".format(out_folder, os.path.split(in_folder)[-1])
print("Outputting to {}".format(out_folder))
if not os.path.exists(out_folder):
os.makedirs(out_folder)
elif not overwrite:
raise AttributeError("Output path exists! Exiting")
```
%% Output
/gpfs/exfel/data/scratch/haufs/clean_cal/karabo/extern/lib/python3.4/importlib/_bootstrap.py:321: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
return f(*args, **kwds)
Connecting to profile noDB
Working in IL Mode: False. Actual cells in use are: 176
Outputting to /gpfs/exfel/exp/SPB/201831/p900039/proc//r0273
%% Cell type:code id: tags:
``` python
def combine_stack(d, sdim):
combined = np.zeros((sdim, 2048,2048))
combined[...] = np.nan
dy = 0
for i in range(16):
if i < 8:
dx = -512
if i > 3:
dx -= 25
mx = 1
my = i % 8
combined[:, my*128+dy:(my+1)*128+dy,
mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,:,::-1]
dy += 30
if i == 3:
dy += 30
elif i < 12:
dx = 80 - 50
if i == 8:
dy = 4*30 + 30 +50 -30
mx = 1
my = i % 8 +4
combined[:, my*128+dy:(my+1)*128+dy,
mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
dy += 30
else:
dx = 100 - 50
if i == 11:
dy = 20
mx = 1
my = i - 14
combined[:, my*128+dy:(my+1)*128+dy,
mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]
dy += 30
return combined
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
from queue import Queue
from collections import OrderedDict
#if not os.path.exists(out_folder):
# os.makedirs(out_folder)
#elif not overwrite:
# raise AttributeError("Output path exists! Exiting")
def map_modules_from_files(filelist):
module_files = OrderedDict()
mod_ids = OrderedDict()
total_sequences = 0
sequences_qm = {}
for quadrant in range(0, QUADRANTS):
for module in range(0, MODULES_PER_QUAD):
name = "Q{}M{}".format(quadrant + 1, module + 1)
module_files[name] = Queue()
num = quadrant * 4 + module
mod_ids[name] = num
file_infix = "{}{:02d}".format(DET_FILE_INSET, num)
sequences_qm[name] = 0
for file in filelist:
if file_infix in file:
module_files[name].put(file)
total_sequences += 1
sequences_qm[name] += 1
return module_files, mod_ids, total_sequences, sequences_qm
dirlist = sorted(os.listdir(in_folder))
file_list = []
for entry in dirlist:
#only h5 file
abs_entry = "{}/{}".format(in_folder, entry)
if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
if sequences is None:
file_list.append(abs_entry)
else:
for seq in sequences:
if "{:05d}.h5".format(seq) in abs_entry:
file_list.append(os.path.abspath(abs_entry))
mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
MAX_PAR = min(MAX_PAR, total_sequences)
```
%% Cell type:markdown id: tags:
## Processed Files ##
%% Cell type:code id: tags:
``` python
import copy
from IPython.display import HTML, display, Markdown, Latex
import tabulate
print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
table = []
mfc = copy.copy(mapped_files)
ti = 0
for k, files in mfc.items():
i = 0
while not files.empty():
f = files.get()
if i == 0:
table.append((ti, k, i, f))
else:
table.append((ti, "", i, f))
i += 1
ti += 1
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
# restore the queue
mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
```
%% Output
Processing a total of 16 sequence files in chunks of 16
\begin{tabular}{rlrl}
\hline
\# & module & \# module & file \\
\hline
0 & Q1M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD00-S00000.h5 \\
1 & Q1M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD01-S00000.h5 \\
2 & Q1M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD02-S00000.h5 \\
3 & Q1M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD03-S00000.h5 \\
4 & Q2M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD04-S00000.h5 \\
5 & Q2M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD05-S00000.h5 \\
6 & Q2M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD06-S00000.h5 \\
7 & Q2M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD07-S00000.h5 \\
8 & Q3M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD08-S00000.h5 \\
9 & Q3M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD09-S00000.h5 \\
10 & Q3M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD10-S00000.h5 \\
11 & Q3M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD11-S00000.h5 \\
12 & Q4M1 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD12-S00000.h5 \\
13 & Q4M2 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD13-S00000.h5 \\
14 & Q4M3 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD14-S00000.h5 \\
15 & Q4M4 & 0 & /gpfs/exfel/exp/SPB/201831/p900039/raw/r0273/RAW-R0273-AGIPD15-S00000.h5 \\
\hline
\end{tabular}
%% Cell type:code id: tags:
``` python
import copy
from functools import partial
def correct_module(max_cells, do_rel_gain, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, inp):
import numpy as np
import copy
import h5py
import socket
from datetime import datetime
import re
import os
from influxdb import InfluxDBClient
from cal_tools.enums import BadPixels
from cal_tools.agipdlib import AgipdCorrections
client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')
def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,
total_sequences, success, runtime, reason=""):
return {
"measurement": "run_correction",
"tags": {
"host": socket.gethostname(),
"run": run,
"proposal": proposal,
"mem_cells": max_cells,
"sequence": sequence,
"module": qm,
"filesize": filesize,
"chunksize": chunksize,
"total_sequences": total_sequences,
"sequences_module": sequences_qm[qm],
"detector": "AGIPD",
"instrument": "SPB",
},
"time": datetime.utcnow().isoformat(),
"fields": {
"success": success,
"reason": reason,
"runtime": runtime,
}
}
hists_signal_low = None
hists_signal_high = None
hists_gain_vs_signal = None
hists_dig_gain_vs_signal = None
low_edges = None
high_edges = None
signal_edges = None
dig_signal_edges = None
if True: #try:
start = datetime.now()
success = True
reason = ""
filename, filename_out, channel, qm = inp
infile = h5py.File(filename, "r", driver="core")
outfile = h5py.File(filename_out, "w")
agipd_corr = AgipdCorrections(infile, outfile, max_cells, channel, max_pulses,
bins_gain_vs_signal, bins_signal_low_range,
bins_signal_high_range, bins_dig_gain_vs_signal,
do_rel_gain=do_rel_gain)
try:
agipd_corr.get_valid_image_idx()
except IOError:
return
if not nodb:
agipd_corr.initialize_from_db(dbparms, qm, only_dark=(fileparms != ""))
if fileparms != "":
agipd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
print("Initialized constants")
for irange in agipd_corr.get_iteration_range():
agipd_corr.correct_agipd(irange)
print("Iterated")
print("All interations finished")
hists, edges = agipd_corr.get_histograms()
hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal= hists
low_edges, high_edges, signal_edges, dig_signal_edges = edges
outfile.close()
infile.close()
print("Closed files")
#except Exception as e:
# print(e)
success = False
reason = "Error"
#finally:
run = re.findall(r'.*r([0-9]{4}).*', filename)[0]
proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]
sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]
filesize = os.path.getsize(filename)
duration = (datetime.now()-start).total_seconds()
influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)
client.write_points([influx])
return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal,
low_edges, high_edges, signal_edges, dig_signal_edges)
done = False
first_files = []
inp = []
left = total_sequences
bins_gain_vs_signal = (100, 100)
bins_signal_low_range = 100
bins_signal_high_range = 100
bins_dig_gain_vs_signal = (100, 4)
hists_signal_low = np.zeros((bins_signal_low_range, max_pulses), np.float64)
hists_signal_high = np.zeros((bins_signal_low_range, max_pulses), np.float64)
hists_gain_vs_signal = np.zeros((bins_gain_vs_signal), np.float64)
hists_dig_gain_vs_signal = np.zeros((bins_dig_gain_vs_signal), np.float64)
low_edges, high_edges, signal_edges, dig_signal_edges = None, None, None, None
dbparms = cal_db_interface, creation_time, max_cells, bias_voltage, photon_energy
fileparms = calfile
while not done:
dones = []
first = True
for i in range(16):
qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
if qm in mapped_files and not mapped_files[qm].empty():
fname_in = str(mapped_files[qm].get())
dones.append(mapped_files[qm].empty())
else:
print("Skipping {}".format(qm))
first_files.append((None, None))
continue
fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
if first:
first_files.append((fname_in, fout))
inp.append((fname_in, fout, i, qm))
first = False
if len(inp) >= min(MAX_PAR, left):
print("Running {} tasks parallel".format(len(inp)))
p = partial(correct_module, max_cells, do_rel_gain, index_v, CHUNK_SIZE, total_sequences,
sequences_qm, bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb)
r = view.map_sync(p, inp)
#r = list(map(p, inp))
inp = []
left -= MAX_PAR
for rr in r:
if rr is not None:
hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_edges = rr
hl, hh, hg, hdg, low_edges, high_edges, signal_edges, dig_signal_edges = rr
if hl is not None: # any one being None will also make the others None
hists_signal_low += hl.astype(np.float64)
hists_signal_high += hh.astype(np.float64)
hists_gain_vs_signal += hg.astype(np.float64)
hists_dig_gain_vs_signal += hdg.astype(np.float64)
done = all(dones)
```
%% Output
Running 16 tasks parallel
%% Cell type:code id: tags:
``` python
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np
%matplotlib inline
def do_3d_plot(data, edges, x_axis, y_axis):
fig = plt.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
# Make data.
X = edges[0][:-1]
Y = edges[1][:-1]
X, Y = np.meshgrid(X, Y)
Z = data.T
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_zlabel("Counts")
```
%% Cell type:markdown id: tags:
## Signal vs. Analogue Gain ##
The following plot shows plots signal vs. gain for the first 128 images.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Analogue gain (ADU)")
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis):
from matplotlib.colors import LogNorm
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
cb = fig.colorbar(im)
cb.set_label("Counts")
do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Value (ADU)")
```
%% Cell type:markdown id: tags:
## Signal vs. Digitized Gain ##
The following plot shows plots signal vs. digitized gain for the first 128 images.
%% Cell type:code id: tags:
``` python
do_2d_plot(hists_dig_gain_vs_signal, dig_signal_edges, "Signal (ADU)", "Gain Bit Value")
```
%% Cell type:markdown id: tags:
## Mean Intensity per Pulse ##
The following plots show the mean signal for each pulse in a detailed and expanded intensity region.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
corrected = []
raw = []
gains = []
mask = []
for i, ff in enumerate(first_files):
try:
rf, cf = ff
#print(cf, i)
if rf is None:
print(rf)
raise Exception("File not present")
infile = h5py.File(rf, "r")
raw.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][:max_cells,0,...]))
infile.close()
infile = h5py.File(cf, "r")
#print("/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i))
corrected.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/data".format(i)][:max_cells,...]))
gains.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/gain".format(i)][:max_cells,...]))
mask.append(np.array(infile["/INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/image/mask".format(i)][:max_cells,...]))
infile.close()
except Exception as e:
corrected.append(np.zeros((max_cells, 512, 128)))
gains.append(np.zeros((max_cells, 512, 128)))
mask.append(np.zeros((max_cells, 512, 128)))
```
%% Cell type:code id: tags:
``` python
domask = True
if domask:
for i, c in enumerate(corrected):
c[mask[i] != 0] = 0
combined = combine_stack(corrected, corrected[0].shape[0])
combined_raw = combine_stack(raw, raw[0].shape[0])
combined_g = combine_stack(gains, gains[0].shape[0])
combined_mask = combine_stack(mask, mask[0].shape[0])
```
%% Cell type:markdown id: tags:
### Mean RAW Preview ###
The per pixel mean of the first 128 images of the RAW data
%% Cell type:code id: tags:
``` python
%matplotlib inline
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.mean(combined_raw[:,:1300,400:1600],axis=0),
vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), 4000),
vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 7000), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Single Shot Preview ###
A single shot image from cell 12 of the first train
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
dim = combined[70,:1300,400:1600]
im = ax.imshow(dim, vmin=-5,
vmax=max(10*np.median(dim[dim > 0]), 100), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Mean CORRECTED Preview ###
The per pixel mean of the first 128 images of the CORRECTED data
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.mean(combined[:,:1300,400:1600], axis=0), vmin=-5,
vmax=max(10*np.median(combined[combined > 0]), 100), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Maximum GAIN Preview ###
The per pixel maximum of the first 128 images of the digitized GAIN data
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.max(combined_g[:,:1300,400:1600], axis=0), vmin=0,
vmax=3, cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
from cal_tools.enums import BadPixels
from IPython.display import HTML, display, Markdown, Latex
import tabulate
table = []
for item in BadPixels:
table.append((item.name, "{:016b}".format(item.value)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:markdown id: tags:
### Single Shot Bad Pixels ###
A single shot bad pixel map from cell 4 of the first train
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.log2(combined_mask[4,:1300,400:1600]), vmin=0,
vmax=32, cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
Loading