Skip to content
Snippets Groups Projects
Commit 22b9c50b authored by Steffen Hauf's avatar Steffen Hauf
Browse files

Update LPD_Correct_and_Verify.ipynb

parent 7a4e4607
No related branches found
No related tags found
1 merge request!112Only look LPD files in geo assembly
%% Cell type:markdown id: tags:
# LPD Offline Correction #
Author: European XFEL Detector Group, Version: 1.0
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/FXE/201931/p900088/raw/" # the folder to read data from, required
run = 115 # runs to process, required
out_folder = "/gpfs/exfel/exp/FXE/201931/p900088/proc/" # the folder to output to, required
calfile = "/gpfs/exfel/data/scratch/xcal/lpd_store_0519.h5" # path to constants extracted from the db into a file
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
mem_cells = 512 # memory cells in data
overwrite = True # set to True if existing data should be overwritten
no_relative_gain = False # do not do relative gain correction
no_flat_fields = False # do not do flat field correction
cluster_profile = "noDB33" # cluster profile to use
max_pulses = 512 # maximum number of pulses per train
use_now_as_creation_date = False # do not use dir creation data, but now
no_non_linear_corrections = False # do not apply non-linear corrections
max_cells_db = 512 # maximum cells for data from the database
rawversion = 2 # raw format version
instrument = "FXE" # the instrument
capacitor = '5pF' # capacitor setting: 5pF or 50pF
photon_energy = 9.2 # the photon energy in keV
nodb = False # set to true if db input is to be avoided
bias_voltage = 250 # detector bias voltage
cal_db_interface = "tcp://max-exfl016:8015#8020" # the database interface to use
geometry_file = "/gpfs/exfel/d/cal/exchange/lpdMF_00.h5" # the geometry file to use, MAR 2018
beam_center_offset = [1.5, 1] # offset from the beam center, MAR 2018
sequences_per_node = 1 # sequence files to process per node
timeout_cal_db = 30000 # timeout for calibration db requests in milliseconds
def balance_sequences(in_folder, run, sequences, sequences_per_node):
import glob
import re
import numpy as np
if sequences[0] == -1:
sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
seq_nums = set()
for sf in sequence_files:
seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
seq_nums.add(int(seqnum))
seq_nums -= set(sequences)
nsplits = len(seq_nums)//sequences_per_node+1
while nsplits > 16:
sequences_per_node += 1
nsplits = len(seq_nums)//sequences_per_node+1
print("Changed to {} sequences per node to have a maximum of 8 concurrent jobs".format(sequences_per_node))
return [l.tolist() for l in np.array_split(list(seq_nums), nsplits) if len(l)]
else:
return sequences
```
%% Cell type:code id: tags:
``` python
import sys
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
max_cells = mem_cells
if sequences[0] == -1:
sequences = None
do_rel_gain = not no_relative_gain
do_ff = not no_flat_fields
index_v = rawversion
#do_ff = False
#relgain_store = "/gpfs/exfel/d/proc/FXE/201830/p900020/calibration/lpd_ci_store_{}_16_5pf.h5"
print("Applying FF corrections: {}".format(do_ff))
# make sure a cluster is running with ipcluster start --n=32, give it a while to start
import os
import h5py
import numpy as np
import matplotlib
matplotlib.use("agg")
import matplotlib.pyplot as plt
from ipyparallel import Client
from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions
from cal_tools.tools import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_constant_from_db
from cal_tools.influx import InfluxLogger
from cal_tools.enums import BadPixels
from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview
from cal_tools.lpdlib import LpdCorrections
from datetime import datetime
from collections import OrderedDict
from influxdb import InfluxDBClient
from datetime import datetime
print("Connecting to profile {}".format(cluster_profile))
view = Client(profile=cluster_profile)[:]
view.use_dill()
gains = np.arange(3)
cells = np.arange(max_cells)
use_dir_creation_date = not use_now_as_creation_date
QUADRANTS = 4
MODULES_PER_QUAD = 4
DET_FILE_INSET = "LPD"
CHUNK_SIZE = 512
MAX_PAR = 32
out_folder = "{}/r{:04d}".format(out_folder, run)
print("Outputting to {}".format(out_folder))
if not os.path.exists(out_folder):
os.makedirs(out_folder)
elif not overwrite:
raise AttributeError("Output path exists! Exiting")
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
else:
creation_time = datetime.now()
print("Using {} as creation time".format(creation_time.isoformat()))
in_folder = "{}/r{:04d}".format(in_folder, run)
run, proposal, seq = run_prop_seq_from_path(in_folder)
logger = InfluxLogger(detector="LPD", instrument=instrument, mem_cells=mem_cells,
notebook=get_notebook_name(), proposal=proposal)
client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
from queue import Queue
from collections import OrderedDict
#if not os.path.exists(out_folder):
# os.makedirs(out_folder)
#elif not overwrite:
# raise AttributeError("Output path exists! Exiting")
def map_modules_from_files(filelist):
module_files = OrderedDict()
mod_ids = OrderedDict()
total_sequences = 0
sequences_qm = {}
for quadrant in range(0, QUADRANTS):
for module in range(0, MODULES_PER_QUAD):
name = "Q{}M{}".format(quadrant + 1, module + 1)
module_files[name] = Queue()
num = quadrant * 4 + module
mod_ids[name] = num
file_infix = "{}{:02d}".format(DET_FILE_INSET, num)
sequences_qm[name] = 0
for file in filelist:
if file_infix in file:
module_files[name].put(file)
total_sequences += 1
sequences_qm[name] += 1
return module_files, mod_ids, total_sequences, sequences_qm
dirlist = sorted(os.listdir(in_folder))
file_list = []
for entry in dirlist:
#only h5 file
abs_entry = "{}/{}".format(in_folder, entry)
if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == ".h5":
if sequences is None:
file_list.append(abs_entry)
else:
for seq in sequences:
if "{:05d}.h5".format(seq) in abs_entry:
file_list.append(os.path.abspath(abs_entry))
mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
MAX_PAR = min(MAX_PAR, total_sequences)
```
%% Cell type:markdown id: tags:
## Processed Files ##
%% Cell type:code id: tags:
``` python
import copy
from IPython.display import HTML, display, Markdown, Latex
import tabulate
print("Processing a total of {} sequence files in chunks of {}".format(total_sequences, MAX_PAR))
table = []
mfc = copy.copy(mapped_files)
ti = 0
for k, files in mfc.items():
i = 0
while not files.empty():
f = files.get()
if i == 0:
table.append((ti, k, i, f))
else:
table.append((ti, "", i, f))
i += 1
ti += 1
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
# restore the queue
mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)
```
%% Cell type:code id: tags:
``` python
import copy
from functools import partial
def correct_module(max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses,
dbparms, fileparms, nodb, no_non_linear_corrections, inp):
import numpy as np
import copy
import h5py
import socket
from datetime import datetime
import re
import os
from influxdb import InfluxDBClient
from cal_tools.enums import BadPixels
from cal_tools.lpdlib import LpdCorrections
client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')
def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,
total_sequences, success, runtime, reason=""):
return {
"measurement": "run_correction",
"tags": {
"host": socket.gethostname(),
"run": run,
"proposal": proposal,
"mem_cells": max_cells,
"sequence": sequence,
"module": qm,
"filesize": filesize,
"chunksize": chunksize,
"total_sequences": total_sequences,
"sequences_module": sequences_qm[qm],
"detector": "LPD",
"instrument": "FXE",
},
"time": datetime.utcnow().isoformat(),
"fields": {
"success": success,
"reason": reason,
"runtime": runtime,
}
}
hists_signal_low = None
hists_signal_high = None
hists_gain_vs_signal = None
low_edges = None
high_edges = None
signal_edges = None
when = None
qm = None
try:
start = datetime.now()
success = True
reason = ""
filename, filename_out, channel, qm = inp
infile = h5py.File(filename, "r", driver="core")
outfile = h5py.File(filename_out, "w")
lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
bins_gain_vs_signal, bins_signal_low_range,
bins_signal_high_range, do_ff=do_ff, raw_fmt_version=index_v,
correct_non_linear=(not no_non_linear_corrections))
try:
lpd_corr.get_valid_image_idx()
except IOError:
return
if not nodb:
when = lpd_corr.initialize_from_db(dbparms, qm, only_dark=(fileparms != ""))
if fileparms != "":
lpd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)
print("Initialized constants")
for irange in lpd_corr.get_iteration_range():
lpd_corr.correct_lpd(irange)
print("All interations finished")
hists, edges = lpd_corr.get_histograms()
hists_signal_low, hists_signal_high, hists_gain_vs_signal = hists
low_edges, high_edges, signal_edges = edges
outfile.close()
infile.close()
print("Closed files")
except Exception as e:
print(e)
success = False
reason = "Error"
finally:
run = re.findall(r'.*r([0-9]{4}).*', filename)[0]
proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]
sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]
filesize = os.path.getsize(filename)
duration = (datetime.now()-start).total_seconds()
influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)
#client.write_points([influx])
return hists_signal_low, hists_signal_high, hists_gain_vs_signal, low_edges, high_edges, signal_edges, when, qm
done = False
first_files = []
inp = []
left = total_sequences
bins_gain_vs_signal = (100, 4)
bins_signal_low_range = 100
bins_signal_high_range = 100
hists_signal_low = np.zeros((bins_signal_low_range, max_pulses), np.float64)
hists_signal_high = np.zeros((bins_signal_high_range, max_pulses), np.float64)
hists_gain_vs_signal = np.zeros((bins_gain_vs_signal), np.float64)
low_edges, high_edges, signal_edges = None, None, None
dbparms = cal_db_interface, creation_time, max_cells_db, capacitor, bias_voltage, photon_energy, timeout_cal_db
fileparms = calfile
whens = {}
while not done:
dones = []
first = True
for i in range(16):
qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
if qm in mapped_files and not mapped_files[qm].empty():
fname_in = str(mapped_files[qm].get())
dones.append(mapped_files[qm].empty())
else:
print("Skipping {}".format(qm))
first_files.append((None, None))
continue
fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
if first:
first_files.append((fname_in, fout))
inp.append((fname_in, fout, i, qm))
first = False
if len(inp) >= min(MAX_PAR, left):
print("Running {} tasks parallel".format(len(inp)))
p = partial(correct_module, max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,
bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses, dbparms,
fileparms, nodb, no_non_linear_corrections)
r = view.map_sync(p, inp)
#r = list(map(p, inp))
inp = []
left -= MAX_PAR
for rr in r:
if rr is not None:
hl, hh, hg, low_edges, high_edges, signal_edges, when, qm = rr
whens[qm] = when
if hl is not None: # any one being None will also make the others None
hists_signal_low += hl.astype(np.float64)
hists_signal_high += hh.astype(np.float64)
hists_gain_vs_signal += hg.astype(np.float64)
done = all(dones)
```
%% Cell type:code id: tags:
``` python
print("Offset where injected on: ")
for qm, when in whens.items():
print("{}: {}".format(qm, when))
```
%% Cell type:code id: tags:
``` python
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np
%matplotlib inline
def do_3d_plot(data, edges, x_axis, y_axis):
fig = plt.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
# Make data.
X = edges[0][:-1]
Y = edges[1][:-1]
X, Y = np.meshgrid(X, Y)
Z = data.T
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_zlabel("Counts")
```
%% Cell type:markdown id: tags:
## Signal vs. Analogue Gain ##
The following plot shows plots signal vs. gain for the first 1280 images.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis):
from matplotlib.colors import LogNorm
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
cb = fig.colorbar(im)
cb.set_label("Counts")
do_2d_plot(hists_gain_vs_signal, signal_edges, "Signal (ADU)", "Gain Bit Value")
```
%% Cell type:markdown id: tags:
## Mean Intensity per Pulse ##
The following plots show the mean signal for each pulse in a detailed and expanded intensity region.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_low, low_edges, "Signal (ADU)", "Pulse id")
do_3d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_high, high_edges, "Signal (ADU)", "Pulse id")
```
%% Cell type:markdown id: tags:
## Data Preview ##
In the following geometry information from the LPD geometry file is applied. Quadrants are positioned to last known position. No bad pixel masking has been performed.
%% Cell type:code id: tags:
``` python
# geometry information
dc = beam_center_offset
#d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018
d_quads = [(-19+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],19+dc[1]),(-285+dc[0],-271+dc[1])] # MAY 2019
import cal_tools.metrology as metro
in_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[1] if sequences else 1)
in_files = "{}/CORR*LPD*S{:05d}*.h5".format(out_folder, sequences[0] if sequences else 0)
datapath = "INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data"
print("Preview is from {}".format(in_files))
```
%% Cell type:code id: tags:
``` python
posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)
datapath = "INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/mask"
maskedarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)
```
%% Cell type:code id: tags:
``` python
# convert the Carthesian coordinates of the detector to polar coordinates
def mod_cart_to_pol(d, dx, dy, filter_by_val=True):
""" Convert carthesian coords to polar coords
"""
cx, cy = d.shape
x = np.arange(cx)+dx
y = np.arange(cy)+dy
x = np.repeat(x[:,None], cy, axis=1)
y = np.repeat(y[None,:], cx, axis=0)
rho = np.sqrt(x**2 + y**2).flatten()
phi = np.arctan2(y, x).flatten()
flat = d.flatten()
# we also perform a bit of filtering here
if filter_by_val:
good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)
return rho[good], phi[good], flat[good], good
return rho, phi, flat, None
```
%% Cell type:markdown id: tags:
### Single Short Preview ###
A single shot image from cell 5 of the first train
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111)
parr = posarr[5,...]
im=ax.imshow((parr), vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
cb = fig.colorbar(im)
cb.set_label("Intensity (ADU")
```
%% Cell type:markdown id: tags:
### Pixel Mean Preview ###
The per pixel mean value of the first 100 images
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111)
parr = np.mean(posarr, axis=0)
im=ax.imshow((parr), vmin=0, vmax=max(10*np.median(parr[parr > 0]), 100))
cb = fig.colorbar(im)
cb.set_label("Intensity (ADU")
```
%% Cell type:markdown id: tags:
### Radial Profile ###
The simple azimuthley integrated profile plotted assumes the beam centered in the hole, it is thus not always fully accurate.
%% Cell type:code id: tags:
``` python
# Here we create histograms of the data in a polar coordinate system.
# We use numpys hist2d function, giving it the polar coordinates of
# each pixels, and weighing that coordinate with the pixel's value.
# We obtain a histogram for each module, according to its position defined
# in the coord_list.
from scipy.stats import binned_statistic_2d
hs = []
bins_nums = []
edges = []
goods = []
bins = 5000
dx, dy = -750, -750
rho, phi, weights, good = mod_cart_to_pol(np.mean(posarr, axis=0), dy, dx, False)
#h, phi_edges, rho_edges = np.histogram2d(phi, rho, bins=(1000,1000),
# range=((-np.pi, np.pi), (0, 1000)),
# weights=weights)
h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),
range=((-np.pi, np.pi), (0, 1000)),
statistic = "sum")
bins_nums.append(bns)
hs.append(h)
edges.append((phi_edges, rho_edges))
goods.append(good)
```
%% Cell type:code id: tags:
``` python
x = np.arange(bins)/bins*1000*500e-6
y = np.arange(bins)/bins*2.
ds = np.array(hs).sum(axis=0)
```
%% Cell type:code id: tags:
``` python
# With appropriate coordinates given, plotting a profile along the
# vertical axis should give us the positions of the diffraction peaks,
# Here still as distances on the detector plane. With knowledge of the
# detector to sample distance, these could then be converted in
# reciprocal coordinates.
ds[ds == 0] = np.nan
profile = np.nanmedian(ds, axis=0)
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
p = ax.plot(x, profile)
l =ax.set_ylabel("Median intensity (arb. units)")
l = ax.set_xlabel("Radial distance (arb. units)")
```
%% Cell type:markdown id: tags:
## Maxium Gain Value Reached ##
The following plot shows the maximum gain value reached. It can be used as an indication of whether the detector went into saturation.
%% Cell type:code id: tags:
``` python
datapath = "INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/gain"
posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 100)
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111)
parr = np.max(posarr, axis=0)
im=ax.imshow((parr), vmin=0, vmax=3)
cb = fig.colorbar(im)
cb.set_label("Intensity (ADU")
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment