Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pycalibration
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
calibration
pycalibration
Commits
5cf97c95
Commit
5cf97c95
authored
2 years ago
by
Karim Ahmed
Browse files
Options
Downloads
Patches
Plain Diff
fixes after testing with reference run and remove unneeded parameters in 1st cell
parent
99ba2406
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!705
[Reproducability][LPD][CORRECT] Retrieve constants precorrection notebook.
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
notebooks/LPD/LPD_Correct_Fast.ipynb
+9
-9
9 additions, 9 deletions
notebooks/LPD/LPD_Correct_Fast.ipynb
notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
+18
-35
18 additions, 35 deletions
notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
with
27 additions
and
44 deletions
notebooks/LPD/LPD_Correct_Fast.ipynb
+
9
−
9
View file @
5cf97c95
...
...
@@ -35,7 +35,7 @@
"output_source = '' # Output fast data source, empty to use same as input.\n",
"\n",
"# CalCat parameters\n",
"
use_dir_
creation_
dat
e =
True # Use the creation date of the directory for database time derivation.
\n",
"creation_
tim
e =
\"\" # The timestamp to use with Calibration DBe. Required Format: \"YYYY-MM-DD hh:mm:ss\" e.g. 2019-07-04 11:02:41
\n",
"cal_db_interface = '' # Not needed, compatibility with current webservice.\n",
"cal_db_timeout = 0 # Not needed, compatbility with current webservice.\n",
"cal_db_root = '/gpfs/exfel/d/cal/caldb_store'\n",
...
...
@@ -105,7 +105,12 @@
"from extra_data.components import LPD1M\n",
"\n",
"from cal_tools.lpdalgs import correct_lpd_frames\n",
"from cal_tools.tools import CalibrationMetadata, get_dir_creation_date, write_compressed_frames\n",
"from cal_tools.tools import (\n",
" CalibrationMetadata,\n",
" calcat_creation_time,\n",
" get_dir_creation_date,\n",
" write_compressed_frames,\n",
" )\n",
"from cal_tools.files import DataFile\n",
"from cal_tools.restful_config import restful_config"
]
...
...
@@ -135,12 +140,7 @@
"\n",
"metadata = CalibrationMetadata(metadata_folder or out_folder)\n",
"\n",
"if use_dir_creation_date:\n",
" creation_time = get_dir_creation_date(in_folder, run) \n",
"else:\n",
" from datetime import datetime\n",
" creation_time = datetime.now()\n",
" \n",
"creation_time = calcat_creation_time(in_folder, run, creation_time)\n",
"print(f'Using {creation_time.isoformat()} as creation time')\n",
"\n",
"# Pick all modules/aggregators or those selected.\n",
...
...
@@ -242,7 +242,7 @@
" start = perf_counter()\n",
" for da, ccvs in const_yaml.items():\n",
"\n",
" for calibration_name, ccv in ccvs.items():\n",
" for calibration_name, ccv in ccvs
['constants']
.items():\n",
"\n",
" dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32\n",
"\n",
...
...
%% Cell type:markdown id: tags:
# LPD Offline Correction #
Author: European XFEL Data Analysis Group
%% Cell type:code id: tags:
```
python
# Input parameters
in_folder
=
"
/gpfs/exfel/exp/FXE/202201/p003073/raw/
"
# the folder to read data from, required
out_folder
=
"
/gpfs/exfel/data/scratch/schmidtp/random/LPD_test
"
# the folder to output to, required
metadata_folder
=
''
# Directory containing calibration_metadata.yml when run by xfel-calibrate.
sequences
=
[
-
1
]
# Sequences to correct, use [-1] for all
modules
=
[
-
1
]
# Modules indices to correct, use [-1] for all, only used when karabo_da is empty
karabo_da
=
[
''
]
# Data aggregators names to correct, use [''] for all
run
=
10
# runs to process, required
# Source parameters
karabo_id
=
'
FXE_DET_LPD1M-1
'
# Karabo domain for detector.
input_source
=
'
{karabo_id}/DET/{module_index}CH0:xtdf
'
# Input fast data source.
output_source
=
''
# Output fast data source, empty to use same as input.
# CalCat parameters
use_dir_
creation_
dat
e
=
True
# Use the creation date of the directory for database time derivation.
creation_
tim
e
=
""
# The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
cal_db_interface
=
''
# Not needed, compatibility with current webservice.
cal_db_timeout
=
0
# Not needed, compatbility with current webservice.
cal_db_root
=
'
/gpfs/exfel/d/cal/caldb_store
'
# Operating conditions
mem_cells
=
512
# Memory cells, LPD constants are always taken with 512 cells.
bias_voltage
=
250.0
# Detector bias voltage.
capacitor
=
'
5pF
'
# Capacitor setting: 5pF or 50pF
photon_energy
=
9.2
# Photon energy in keV.
category
=
0
# Whom to blame.
# Correction parameters
offset_corr
=
True
# Offset correction.
rel_gain
=
True
# Gain correction based on RelativeGain constant.
ff_map
=
True
# Gain correction based on FFMap constant.
gain_amp_map
=
True
# Gain correction based on GainAmpMap constant.
# Output options
overwrite
=
True
# set to True if existing data should be overwritten
chunks_data
=
1
# HDF chunk size for pixel data in number of frames.
chunks_ids
=
32
# HDF chunk size for cellId and pulseId datasets.
create_virtual_cxi_in
=
''
# Folder to create virtual CXI files in (for each sequence).
# Parallelization options
sequences_per_node
=
1
# Sequence files to process per node
max_nodes
=
8
# Maximum number of SLURM jobs to split correction work into
num_workers
=
8
# Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
num_threads_per_worker
=
32
# Number of threads per worker.
def
balance_sequences
(
in_folder
,
run
,
sequences
,
sequences_per_node
,
karabo_da
,
max_nodes
):
from
xfel_calibrate.calibrate
import
balance_sequences
as
bs
return
bs
(
in_folder
,
run
,
sequences
,
sequences_per_node
,
karabo_da
,
max_nodes
=
max_nodes
)
```
%% Cell type:code id: tags:
```
python
from
collections
import
OrderedDict
from
pathlib
import
Path
from
time
import
perf_counter
import
gc
import
re
import
warnings
import
numpy
as
np
import
h5py
import
matplotlib
matplotlib
.
use
(
'
agg
'
)
import
matplotlib.pyplot
as
plt
%
matplotlib
inline
from
calibration_client
import
CalibrationClient
from
calibration_client.modules
import
CalibrationConstantVersion
import
extra_data
as
xd
import
extra_geom
as
xg
import
pasha
as
psh
from
extra_data.components
import
LPD1M
from
cal_tools.lpdalgs
import
correct_lpd_frames
from
cal_tools.tools
import
CalibrationMetadata
,
get_dir_creation_date
,
write_compressed_frames
from
cal_tools.tools
import
(
CalibrationMetadata
,
calcat_creation_time
,
get_dir_creation_date
,
write_compressed_frames
,
)
from
cal_tools.files
import
DataFile
from
cal_tools.restful_config
import
restful_config
```
%% Cell type:markdown id: tags:
# Prepare environment
%% Cell type:code id: tags:
```
python
file_re
=
re
.
compile
(
r
'
^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$
'
)
# This should probably move to cal_tools
run_folder
=
Path
(
in_folder
)
/
f
'
r
{
run
:
04
d
}
'
out_folder
=
Path
(
out_folder
)
out_folder
.
mkdir
(
exist_ok
=
True
)
output_source
=
output_source
or
input_source
cal_db_root
=
Path
(
cal_db_root
)
metadata
=
CalibrationMetadata
(
metadata_folder
or
out_folder
)
if
use_dir_creation_date
:
creation_time
=
get_dir_creation_date
(
in_folder
,
run
)
else
:
from
datetime
import
datetime
creation_time
=
datetime
.
now
()
creation_time
=
calcat_creation_time
(
in_folder
,
run
,
creation_time
)
print
(
f
'
Using
{
creation_time
.
isoformat
()
}
as creation time
'
)
# Pick all modules/aggregators or those selected.
if
not
karabo_da
or
karabo_da
==
[
''
]:
if
not
modules
or
modules
==
[
-
1
]:
modules
=
list
(
range
(
16
))
karabo_da
=
[
f
'
LPD
{
i
:
02
d
}
'
for
i
in
modules
]
# Pick all sequences or those selected.
if
not
sequences
or
sequences
==
[
-
1
]:
do_sequence
=
lambda
seq
:
True
else
:
do_sequence
=
[
int
(
x
)
for
x
in
sequences
].
__contains__
# List of detector sources.
det_inp_sources
=
[
input_source
.
format
(
karabo_id
=
karabo_id
,
module_index
=
int
(
da
[
-
2
:]))
for
da
in
karabo_da
]
```
%% Cell type:markdown id: tags:
# Select data to process
%% Cell type:code id: tags:
```
python
data_to_process
=
[]
for
inp_path
in
run_folder
.
glob
(
'
RAW-*.h5
'
):
match
=
file_re
.
match
(
inp_path
.
stem
)
if
match
[
2
]
not
in
karabo_da
or
not
do_sequence
(
int
(
match
[
3
])):
continue
outp_path
=
out_folder
/
'
CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5
'
.
format
(
run
=
int
(
match
[
1
]),
aggregator
=
match
[
2
],
seq
=
int
(
match
[
3
]))
data_to_process
.
append
((
match
[
2
],
inp_path
,
outp_path
))
print
(
'
Files to process:
'
)
for
data_descr
in
sorted
(
data_to_process
,
key
=
lambda
x
:
f
'
{
x
[
0
]
}{
x
[
1
]
}
'
):
print
(
f
'
{
data_descr
[
0
]
}
\t
{
data_descr
[
1
]
}
'
)
```
%% Cell type:markdown id: tags:
# Obtain and prepare calibration constants
%% Cell type:code id: tags:
```
python
# Connect to CalCat.
calcat_config
=
restful_config
[
'
calcat
'
]
client
=
CalibrationClient
(
base_api_url
=
calcat_config
[
'
base-api-url
'
],
use_oauth2
=
calcat_config
[
'
use-oauth2
'
],
client_id
=
calcat_config
[
'
user-id
'
],
client_secret
=
calcat_config
[
'
user-secret
'
],
user_email
=
calcat_config
[
'
user-email
'
],
token_url
=
calcat_config
[
'
token-url
'
],
refresh_url
=
calcat_config
[
'
refresh-url
'
],
auth_url
=
calcat_config
[
'
auth-url
'
],
scope
=
''
)
```
%% Cell type:code id: tags:
```
python
metadata
=
CalibrationMetadata
(
metadata_folder
or
out_folder
)
# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
const_yaml
=
metadata
.
setdefault
(
"
retrieved-constants
"
,
{})
```
%% Cell type:code id: tags:
```
python
const_data
=
{}
const_load_mp
=
psh
.
ProcessContext
(
num_workers
=
24
)
if
const_yaml
:
# Read constants from YAML file.
start
=
perf_counter
()
for
da
,
ccvs
in
const_yaml
.
items
():
for
calibration_name
,
ccv
in
ccvs
.
items
():
for
calibration_name
,
ccv
in
ccvs
[
'
constants
'
]
.
items
():
dtype
=
np
.
uint32
if
calibration_name
.
startswith
(
'
BadPixels
'
)
else
np
.
float32
const_data
[(
da
,
calibration_name
)]
=
dict
(
path
=
Path
(
ccv
[
'
file-path
'
]),
dataset
=
ccv
[
'
dataset-name
'
],
data
=
const_load_mp
.
alloc
(
shape
=
(
256
,
256
,
mem_cells
,
3
),
dtype
=
dtype
)
)
else
:
# Retrieve constants from CALCAT.
dark_calibrations
=
{
1
:
'
Offset
'
,
# np.float32
14
:
'
BadPixelsDark
'
# should be np.uint32, but is np.float64
}
dark_condition
=
[
dict
(
parameter_id
=
1
,
value
=
bias_voltage
),
# Sensor bias voltage
dict
(
parameter_id
=
7
,
value
=
mem_cells
),
# Memory cells
dict
(
parameter_id
=
15
,
value
=
capacitor
),
# Feedback capacitor
dict
(
parameter_id
=
13
,
value
=
256
),
# Pixels X
dict
(
parameter_id
=
14
,
value
=
256
),
# Pixels Y
]
illuminated_calibrations
=
{
20
:
'
BadPixelsFF
'
,
# np.uint32
42
:
'
GainAmpMap
'
,
# np.float32
43
:
'
FFMap
'
,
# np.float32
44
:
'
RelativeGain
'
# np.float32
}
illuminated_condition
=
dark_condition
.
copy
()
illuminated_condition
+=
[
dict
(
parameter_id
=
3
,
value
=
photon_energy
),
# Source energy
dict
(
parameter_id
=
25
,
value
=
category
)
# category
]
print
(
'
Querying calibration database
'
,
end
=
''
,
flush
=
True
)
start
=
perf_counter
()
for
calibrations
,
condition
in
[
(
dark_calibrations
,
dark_condition
),
(
illuminated_calibrations
,
illuminated_condition
)
]:
resp
=
CalibrationConstantVersion
.
get_closest_by_time_by_detector_conditions
(
client
,
karabo_id
,
list
(
calibrations
.
keys
()),
{
'
parameters_conditions_attributes
'
:
condition
},
karabo_da
=
''
,
event_at
=
creation_time
.
isoformat
(),
snapshot_at
=
None
)
if
not
resp
[
'
success
'
]:
raise
RuntimeError
(
resp
)
for
ccv
in
resp
[
'
data
'
]:
cc
=
ccv
[
'
calibration_constant
'
]
da
=
ccv
[
'
physical_detector_unit
'
][
'
karabo_da
'
]
calibration_name
=
calibrations
[
cc
[
'
calibration_id
'
]]
dtype
=
np
.
uint32
if
calibration_name
.
startswith
(
'
BadPixels
'
)
else
np
.
float32
const_data
[(
da
,
calibration_name
)]
=
dict
(
path
=
Path
(
ccv
[
'
path_to_file
'
])
/
ccv
[
'
file_name
'
],
dataset
=
ccv
[
'
data_set_name
'
],
data
=
const_load_mp
.
alloc
(
shape
=
(
256
,
256
,
mem_cells
,
3
),
dtype
=
dtype
)
)
print
(
'
.
'
,
end
=
''
,
flush
=
True
)
total_time
=
perf_counter
()
-
start
print
(
f
'
{
total_time
:
.
1
f
}
s
'
)
```
%% Cell type:code id: tags:
```
python
def
load_constant_dataset
(
wid
,
index
,
const_descr
):
ccv_entry
=
const_data
[
const_descr
]
with
h5py
.
File
(
cal_db_root
/
ccv_entry
[
'
path
'
],
'
r
'
)
as
fp
:
fp
[
ccv_entry
[
'
dataset
'
]
+
'
/data
'
].
read_direct
(
ccv_entry
[
'
data
'
])
print
(
'
.
'
,
end
=
''
,
flush
=
True
)
print
(
'
Loading calibration data
'
,
end
=
''
,
flush
=
True
)
start
=
perf_counter
()
const_load_mp
.
map
(
load_constant_dataset
,
list
(
const_data
.
keys
()))
total_time
=
perf_counter
()
-
start
print
(
f
'
{
total_time
:
.
1
f
}
s
'
)
```
%% Cell type:code id: tags:
```
python
# These are intended in order cell, X, Y, gain
ccv_offsets
=
{}
ccv_gains
=
{}
ccv_masks
=
{}
ccv_shape
=
(
mem_cells
,
256
,
256
,
3
)
constant_order
=
{
'
Offset
'
:
(
2
,
1
,
0
,
3
),
'
BadPixelsDark
'
:
(
2
,
1
,
0
,
3
),
'
RelativeGain
'
:
(
2
,
1
,
0
,
3
),
'
FFMap
'
:
(
2
,
0
,
1
,
3
),
'
BadPixelsFF
'
:
(
2
,
0
,
1
,
3
),
'
GainAmpMap
'
:
(
2
,
0
,
1
,
3
),
}
def
prepare_constants
(
wid
,
index
,
aggregator
):
consts
=
{
calibration_name
:
entry
[
'
data
'
]
for
(
aggregator_
,
calibration_name
),
entry
in
const_data
.
items
()
if
aggregator
==
aggregator_
}
def
_prepare_data
(
calibration_name
,
dtype
):
return
consts
[
calibration_name
]
\
.
transpose
(
constant_order
[
calibration_name
])
\
.
astype
(
dtype
,
copy
=
True
)
# Make sure array is contiguous.
if
offset_corr
and
'
Offset
'
in
consts
:
ccv_offsets
[
aggregator
]
=
_prepare_data
(
'
Offset
'
,
np
.
float32
)
else
:
ccv_offsets
[
aggregator
]
=
np
.
zeros
(
ccv_shape
,
dtype
=
np
.
float32
)
ccv_gains
[
aggregator
]
=
np
.
ones
(
ccv_shape
,
dtype
=
np
.
float32
)
if
'
BadPixelsDark
'
in
consts
:
ccv_masks
[
aggregator
]
=
_prepare_data
(
'
BadPixelsDark
'
,
np
.
uint32
)
else
:
ccv_masks
[
aggregator
]
=
np
.
zeros
(
ccv_shape
,
dtype
=
np
.
uint32
)
if
rel_gain
and
'
RelativeGain
'
in
consts
:
ccv_gains
[
aggregator
]
*=
_prepare_data
(
'
RelativeGain
'
,
np
.
float32
)
if
ff_map
and
'
FFMap
'
in
consts
:
ccv_gains
[
aggregator
]
*=
_prepare_data
(
'
FFMap
'
,
np
.
float32
)
if
'
BadPixelsFF
'
in
consts
:
np
.
bitwise_or
(
ccv_masks
[
aggregator
],
_prepare_data
(
'
BadPixelsFF
'
,
np
.
uint32
),
out
=
ccv_masks
[
aggregator
])
if
gain_amp_map
and
'
GainAmpMap
'
in
consts
:
ccv_gains
[
aggregator
]
*=
_prepare_data
(
'
GainAmpMap
'
,
np
.
float32
)
print
(
'
.
'
,
end
=
''
,
flush
=
True
)
print
(
'
Preparing constants
'
,
end
=
''
,
flush
=
True
)
start
=
perf_counter
()
psh
.
ThreadContext
(
num_workers
=
len
(
karabo_da
)).
map
(
prepare_constants
,
karabo_da
)
total_time
=
perf_counter
()
-
start
print
(
f
'
{
total_time
:
.
1
f
}
s
'
)
const_data
.
clear
()
# Clear raw constants data now to save memory.
gc
.
collect
();
```
%% Cell type:code id: tags:
```
python
def
correct_file
(
wid
,
index
,
work
):
aggregator
,
inp_path
,
outp_path
=
work
module_index
=
int
(
aggregator
[
-
2
:])
start
=
perf_counter
()
dc
=
xd
.
H5File
(
inp_path
,
inc_suspect_trains
=
False
).
select
(
'
*
'
,
'
image.*
'
,
require_all
=
True
)
inp_source
=
dc
[
input_source
.
format
(
karabo_id
=
karabo_id
,
module_index
=
module_index
)]
open_time
=
perf_counter
()
-
start
# Load raw data for this file.
# Reshaping gets rid of the extra 1-len dimensions without
# mangling the frame axis for an actual frame count of 1.
start
=
perf_counter
()
in_raw
=
inp_source
[
'
image.data
'
].
ndarray
().
reshape
(
-
1
,
256
,
256
)
in_cell
=
inp_source
[
'
image.cellId
'
].
ndarray
().
reshape
(
-
1
)
in_pulse
=
inp_source
[
'
image.pulseId
'
].
ndarray
().
reshape
(
-
1
)
read_time
=
perf_counter
()
-
start
# Allocate output arrays.
out_data
=
np
.
zeros
((
in_raw
.
shape
[
0
],
256
,
256
),
dtype
=
np
.
float32
)
out_gain
=
np
.
zeros
((
in_raw
.
shape
[
0
],
256
,
256
),
dtype
=
np
.
uint8
)
out_mask
=
np
.
zeros
((
in_raw
.
shape
[
0
],
256
,
256
),
dtype
=
np
.
uint32
)
start
=
perf_counter
()
correct_lpd_frames
(
in_raw
,
in_cell
,
out_data
,
out_gain
,
out_mask
,
ccv_offsets
[
aggregator
],
ccv_gains
[
aggregator
],
ccv_masks
[
aggregator
],
num_threads
=
num_threads_per_worker
)
correct_time
=
perf_counter
()
-
start
image_counts
=
inp_source
[
'
image.data
'
].
data_counts
(
labelled
=
False
)
start
=
perf_counter
()
if
(
not
outp_path
.
exists
()
or
overwrite
)
and
image_counts
.
sum
()
>
0
:
fa
=
dc
.
files
[
0
]
sel_trains
=
np
.
isin
(
fa
.
train_ids
,
dc
.
train_ids
)
outp_source_name
=
output_source
.
format
(
karabo_id
=
karabo_id
,
module_index
=
module_index
)
with
DataFile
(
outp_path
,
'
w
'
)
as
outp_file
:
outp_file
.
create_index
(
train_ids
=
dc
.
train_ids
,
timestamps
=
fa
.
file
[
'
INDEX/timestamp
'
][
sel_trains
],
flags
=
fa
.
validity_flag
[
sel_trains
])
outp_file
.
create_metadata
(
like
=
dc
,
instrument_channels
=
(
f
'
{
outp_source_name
}
/image
'
,))
outp_source
=
outp_file
.
create_instrument_source
(
outp_source_name
)
outp_source
.
create_index
(
image
=
image_counts
)
outp_source
.
create_key
(
'
image.cellId
'
,
data
=
in_cell
,
chunks
=
(
min
(
chunks_ids
,
in_cell
.
shape
[
0
]),))
outp_source
.
create_key
(
'
image.pulseId
'
,
data
=
in_pulse
,
chunks
=
(
min
(
chunks_ids
,
in_pulse
.
shape
[
0
]),))
outp_source
.
create_key
(
'
image.data
'
,
data
=
out_data
,
chunks
=
(
min
(
chunks_data
,
out_data
.
shape
[
0
]),
256
,
256
))
write_compressed_frames
(
out_gain
,
outp_file
,
f
'
INSTRUMENT/
{
outp_source_name
}
/image/gain
'
,
comp_threads
=
8
)
write_compressed_frames
(
out_mask
,
outp_file
,
f
'
INSTRUMENT/
{
outp_source_name
}
/image/mask
'
,
comp_threads
=
8
)
write_time
=
perf_counter
()
-
start
total_time
=
open_time
+
read_time
+
correct_time
+
write_time
frame_rate
=
in_raw
.
shape
[
0
]
/
total_time
print
(
'
{}
\t
{}
\t
{:.3f}
\t
{:.3f}
\t
{:.3f}
\t
{:.3f}
\t
{:.3f}
\t
{}
\t
{:.1f}
'
.
format
(
wid
,
aggregator
,
open_time
,
read_time
,
correct_time
,
write_time
,
total_time
,
in_raw
.
shape
[
0
],
frame_rate
))
in_raw
=
None
in_cell
=
None
in_pulse
=
None
out_data
=
None
out_gain
=
None
out_mask
=
None
gc
.
collect
()
print
(
'
worker
\t
DA
\t
open
\t
read
\t
correct
\t
write
\t
total
\t
frames
\t
rate
'
)
start
=
perf_counter
()
psh
.
ProcessContext
(
num_workers
=
num_workers
).
map
(
correct_file
,
data_to_process
)
total_time
=
perf_counter
()
-
start
print
(
f
'
Total time:
{
total_time
:
.
1
f
}
s
'
)
```
%% Cell type:markdown id: tags:
# Data preview for first train
%% Cell type:code id: tags:
```
python
geom
=
xg
.
LPD_1MGeometry
.
from_quad_positions
(
[(
11.4
,
299
),
(
-
11.5
,
8
),
(
254.5
,
-
16
),
(
278.5
,
275
)])
output_paths
=
[
outp_path
for
_
,
_
,
outp_path
in
data_to_process
if
outp_path
.
exists
()]
dc
=
xd
.
DataCollection
.
from_paths
(
output_paths
).
select_trains
(
np
.
s_
[
0
])
det
=
LPD1M
(
dc
,
detector_name
=
karabo_id
)
data
=
det
.
get_array
(
'
image.data
'
)
```
%% Cell type:markdown id: tags:
### Intensity histogram across all cells
%% Cell type:code id: tags:
```
python
left_edge_ratio
=
0.01
right_edge_ratio
=
0.99
fig
,
ax
=
plt
.
subplots
(
num
=
1
,
clear
=
True
,
figsize
=
(
15
,
6
))
values
,
bins
,
_
=
ax
.
hist
(
np
.
ravel
(
data
.
data
),
bins
=
2000
,
range
=
(
-
1500
,
2000
))
def
find_nearest_index
(
array
,
value
):
return
(
np
.
abs
(
array
-
value
)).
argmin
()
cum_values
=
np
.
cumsum
(
values
)
vmin
=
bins
[
find_nearest_index
(
cum_values
,
cum_values
[
-
1
]
*
left_edge_ratio
)]
vmax
=
bins
[
find_nearest_index
(
cum_values
,
cum_values
[
-
1
]
*
right_edge_ratio
)]
max_value
=
values
.
max
()
ax
.
vlines
([
vmin
,
vmax
],
0
,
max_value
,
color
=
'
red
'
,
linewidth
=
5
,
alpha
=
0.2
)
ax
.
text
(
vmin
,
max_value
,
f
'
{
left_edge_ratio
*
100
:
.
0
f
}
%
'
,
color
=
'
red
'
,
ha
=
'
center
'
,
va
=
'
bottom
'
,
size
=
'
large
'
)
ax
.
text
(
vmax
,
max_value
,
f
'
{
right_edge_ratio
*
100
:
.
0
f
}
%
'
,
color
=
'
red
'
,
ha
=
'
center
'
,
va
=
'
bottom
'
,
size
=
'
large
'
)
ax
.
text
(
vmax
+
(
vmax
-
vmin
)
*
0.01
,
max_value
/
2
,
'
Colormap interval
'
,
color
=
'
red
'
,
rotation
=
90
,
ha
=
'
left
'
,
va
=
'
center
'
,
size
=
'
x-large
'
)
ax
.
set_xlim
(
vmin
-
(
vmax
-
vmin
)
*
0.1
,
vmax
+
(
vmax
-
vmin
)
*
0.1
)
ax
.
set_ylim
(
0
,
max_value
*
1.1
)
pass
```
%% Cell type:markdown id: tags:
### First memory cell
%% Cell type:code id: tags:
```
python
fig
,
ax
=
plt
.
subplots
(
num
=
2
,
figsize
=
(
15
,
15
),
clear
=
True
,
nrows
=
1
,
ncols
=
1
)
geom
.
plot_data_fast
(
data
[:,
0
,
0
],
ax
=
ax
,
vmin
=
vmin
,
vmax
=
vmax
)
pass
```
%% Cell type:markdown id: tags:
### Train average
%% Cell type:code id: tags:
```
python
fig
,
ax
=
plt
.
subplots
(
num
=
3
,
figsize
=
(
15
,
15
),
clear
=
True
,
nrows
=
1
,
ncols
=
1
)
geom
.
plot_data_fast
(
data
[:,
0
].
mean
(
axis
=
1
),
ax
=
ax
,
vmin
=
vmin
,
vmax
=
vmax
)
pass
```
%% Cell type:markdown id: tags:
### Lowest gain stage per pixel
%% Cell type:code id: tags:
```
python
highest_gain_stage
=
det
.
get_array
(
'
image.gain
'
,
pulses
=
np
.
s_
[:]).
max
(
axis
=
(
1
,
2
))
fig
,
ax
=
plt
.
subplots
(
num
=
4
,
figsize
=
(
15
,
15
),
clear
=
True
,
nrows
=
1
,
ncols
=
1
)
p
=
geom
.
plot_data_fast
(
highest_gain_stage
,
ax
=
ax
,
vmin
=
0
,
vmax
=
2
);
cb
=
ax
.
images
[
0
].
colorbar
cb
.
set_ticks
([
0
,
1
,
2
])
cb
.
set_ticklabels
([
'
High gain
'
,
'
Medium gain
'
,
'
Low gain
'
])
```
%% Cell type:markdown id: tags:
### Create virtual CXI file
%% Cell type:code id: tags:
```
python
if
create_virtual_cxi_in
:
vcxi_folder
=
Path
(
create_virtual_cxi_in
.
format
(
run
=
run
,
proposal_folder
=
str
(
Path
(
in_folder
).
parent
)))
vcxi_folder
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
def
sort_files_by_seq
(
by_seq
,
outp_path
):
by_seq
.
setdefault
(
int
(
outp_path
.
stem
[
-
5
:]),
[]).
append
(
outp_path
)
return
by_seq
from
functools
import
reduce
reduce
(
sort_files_by_seq
,
output_paths
,
output_by_seq
:
=
{})
for
seq_number
,
seq_output_paths
in
output_by_seq
.
items
():
# Create data collection and detector components only for this sequence.
try
:
det
=
LPD1M
(
xd
.
DataCollection
.
from_paths
(
seq_output_paths
),
detector_name
=
karabo_id
,
min_modules
=
4
)
except
ValueError
:
# Couldn't find enough data for min_modules
continue
det
.
write_virtual_cxi
(
vcxi_folder
/
f
'
VCXI-LPD-R
{
run
:
04
d
}
-S
{
seq_number
:
05
d
}
.cxi
'
)
```
...
...
This diff is collapsed.
Click to expand it.
notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb
+
18
−
35
View file @
5cf97c95
...
...
@@ -32,7 +32,7 @@
"output_source = '' # Output fast data source, empty to use same as input.\n",
"\n",
"# CalCat parameters\n",
"
use_dir_
creation_
dat
e =
True # Use the creation date of the directory for database time derivation.
\n",
"creation_
tim
e =
\"\" # The timestamp to use with Calibration DBe. Required Format: \"YYYY-MM-DD hh:mm:ss\" e.g. 2019-07-04 11:02:41
\n",
"cal_db_root = '/gpfs/exfel/d/cal/caldb_store'\n",
"\n",
"# Operating conditions\n",
...
...
@@ -40,23 +40,7 @@
"bias_voltage = 250.0 # Detector bias voltage.\n",
"capacitor = '5pF' # Capacitor setting: 5pF or 50pF\n",
"photon_energy = 9.2 # Photon energy in keV.\n",
"category = 0 # Whom to blame.\n",
"\n",
"# Correction parameters\n",
"offset_corr = True # Offset correction.\n",
"rel_gain = True # Gain correction based on RelativeGain constant.\n",
"ff_map = True # Gain correction based on FFMap constant.\n",
"gain_amp_map = True # Gain correction based on GainAmpMap constant.\n",
"\n",
"# Output options\n",
"overwrite = True # set to True if existing data should be overwritten\n",
"chunks_data = 1 # HDF chunk size for pixel data in number of frames.\n",
"chunks_ids = 32 # HDF chunk size for cellId and pulseId datasets.\n",
"\n",
"# Parallelization options\n",
"max_nodes = 8 # Maximum number of SLURM jobs to split correction work into\n",
"num_workers = 8 # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.\n",
"num_threads_per_worker = 32 # Number of threads per worker."
"category = 0 # Whom to blame."
]
},
{
...
...
@@ -80,6 +64,7 @@
"\n",
"from cal_tools.tools import (\n",
" CalibrationMetadata,\n",
" calcat_creation_time,\n",
" get_dir_creation_date,\n",
" save_constant_metadata,\n",
" write_compressed_frames,\n",
...
...
@@ -108,12 +93,7 @@
"# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml\n",
"retrieved_constants = metadata.setdefault(\"retrieved-constants\", {})\n",
"\n",
"if use_dir_creation_date:\n",
" creation_time = get_dir_creation_date(in_folder, run) \n",
"else:\n",
" from datetime import datetime\n",
" creation_time = datetime.now()\n",
" \n",
"creation_time = calcat_creation_time(in_folder, run, creation_time)\n",
"print(f'Using {creation_time.isoformat()} as creation time')\n",
"\n",
"# Pick all modules/aggregators or those selected.\n",
...
...
@@ -160,7 +140,7 @@
"outputs": [],
"source": [
"dark_calibrations = {\n",
"
1
1: 'Offset',\n",
" 1: 'Offset',\n",
" 14: 'BadPixelsDark',\n",
"}\n",
"\n",
...
...
@@ -174,9 +154,9 @@
"\n",
"illuminated_calibrations = {\n",
" 20: 'BadPixelsFF',\n",
" 4
3
2: 'GainAmpMap',\n",
" 4
1
3: 'FFMap',\n",
" 4
2
4: 'RelativeGain',\n",
" 42: 'GainAmpMap',\n",
" 43: 'FFMap',\n",
" 44: 'RelativeGain',\n",
"}\n",
"\n",
"illuminated_condition = dark_condition.copy()\n",
...
...
@@ -209,17 +189,20 @@
" if not resp[\"success\"]:\n",
" print(f\"ERROR: Constants {list(calibrations.values())} \"\n",
" f\"were not retrieved, {resp['app_info']}\")\n",
" const_mdata[\"file-path\"] = None\n",
" const_mdata[\"dataset-name\"] = None\n",
" const_mdata[\"creation-time\"] = None \n",
" for cname in calibrations.values():\n",
" const_mdata[cname] = dict()\n",
" const_mdata[cname][\"file-path\"] = None\n",
" const_mdata[cname][\"dataset-name\"] = None\n",
" const_mdata[cname][\"creation-time\"] = None \n",
" continue\n",
"\n",
" for ccv in resp[\"data\"]:\n",
" cc = ccv['calibration_constant']\n",
" calibration_name = calibrations[cc['calibration_id']]\n",
" const_mdata[\"file-path\"] = str(Path(ccv['path_to_file']) / ccv['file_name'])\n",
" const_mdata[\"dataset-name\"] = ccv['data_set_name']\n",
" const_mdata[\"creation-time\"] = ccv['begin_at']\n",
" cname = calibrations[cc['calibration_id']]\n",
" const_mdata[cname] = dict()\n",
" const_mdata[cname][\"file-path\"] = str(Path(ccv['path_to_file']) / ccv['file_name'])\n",
" const_mdata[cname][\"dataset-name\"] = ccv['data_set_name']\n",
" const_mdata[cname][\"creation-time\"] = ccv['begin_at']\n",
" pdu = ccv['physical_detector_unit']['physical_name']\n",
"\n",
" print('.', end='', flush=True)\n",
...
...
%% Cell type:markdown id: tags:
# LPD Retrieving Constants Pre-correction #
Author: European XFEL Detector Group, Version: 1.0
The following notebook provides a constants metadata in a YAML file to use while correcting LPD images.
%% Cell type:code id: tags:
```
python
# Input parameters
in_folder
=
"
/gpfs/exfel/exp/FXE/202201/p003073/raw/
"
# the folder to read data from, required
out_folder
=
"
/gpfs/exfel/data/scratch/ahmedk/test/remove/LPD_test
"
# the folder to output to, required
metadata_folder
=
''
# Directory containing calibration_metadata.yml when run by xfel-calibrate.
sequences
=
[
-
1
]
# Sequences to correct, use [-1] for all
modules
=
[
-
1
]
# Modules indices to correct, use [-1] for all, only used when karabo_da is empty
karabo_da
=
[
''
]
# Data aggregators names to correct, use [''] for all
run
=
10
# runs to process, required
# Source parameters
karabo_id
=
'
FXE_DET_LPD1M-1
'
# Karabo domain for detector.
input_source
=
'
{karabo_id}/DET/{module_index}CH0:xtdf
'
# Input fast data source.
output_source
=
''
# Output fast data source, empty to use same as input.
# CalCat parameters
use_dir_
creation_
dat
e
=
True
# Use the creation date of the directory for database time derivation.
creation_
tim
e
=
""
# The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
cal_db_root
=
'
/gpfs/exfel/d/cal/caldb_store
'
# Operating conditions
mem_cells
=
512
# Memory cells, LPD constants are always taken with 512 cells.
bias_voltage
=
250.0
# Detector bias voltage.
capacitor
=
'
5pF
'
# Capacitor setting: 5pF or 50pF
photon_energy
=
9.2
# Photon energy in keV.
category
=
0
# Whom to blame.
# Correction parameters
offset_corr
=
True
# Offset correction.
rel_gain
=
True
# Gain correction based on RelativeGain constant.
ff_map
=
True
# Gain correction based on FFMap constant.
gain_amp_map
=
True
# Gain correction based on GainAmpMap constant.
# Output options
overwrite
=
True
# set to True if existing data should be overwritten
chunks_data
=
1
# HDF chunk size for pixel data in number of frames.
chunks_ids
=
32
# HDF chunk size for cellId and pulseId datasets.
# Parallelization options
max_nodes
=
8
# Maximum number of SLURM jobs to split correction work into
num_workers
=
8
# Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
num_threads_per_worker
=
32
# Number of threads per worker.
```
%% Cell type:code id: tags:
```
python
from
pathlib
import
Path
from
time
import
perf_counter
import
gc
import
re
from
calibration_client
import
CalibrationClient
from
calibration_client.modules
import
CalibrationConstantVersion
import
extra_data
as
xd
import
extra_geom
as
xg
import
pasha
as
psh
from
extra_data.components
import
LPD1M
from
cal_tools.tools
import
(
CalibrationMetadata
,
calcat_creation_time
,
get_dir_creation_date
,
save_constant_metadata
,
write_compressed_frames
,
)
from
cal_tools.files
import
DataFile
from
cal_tools.restful_config
import
restful_config
```
%% Cell type:code id: tags:
```
python
file_re
=
re
.
compile
(
r
'
^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$
'
)
# This should probably move to cal_tools
run_folder
=
Path
(
in_folder
)
/
f
'
r
{
run
:
04
d
}
'
out_folder
=
Path
(
out_folder
)
out_folder
.
mkdir
(
exist_ok
=
True
)
output_source
=
output_source
or
input_source
cal_db_root
=
Path
(
cal_db_root
)
metadata
=
CalibrationMetadata
(
metadata_folder
or
out_folder
)
# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
retrieved_constants
=
metadata
.
setdefault
(
"
retrieved-constants
"
,
{})
if
use_dir_creation_date
:
creation_time
=
get_dir_creation_date
(
in_folder
,
run
)
else
:
from
datetime
import
datetime
creation_time
=
datetime
.
now
()
creation_time
=
calcat_creation_time
(
in_folder
,
run
,
creation_time
)
print
(
f
'
Using
{
creation_time
.
isoformat
()
}
as creation time
'
)
# Pick all modules/aggregators or those selected.
if
not
karabo_da
or
karabo_da
==
[
''
]:
if
not
modules
or
modules
==
[
-
1
]:
modules
=
list
(
range
(
16
))
karabo_da
=
[
f
'
LPD
{
i
:
02
d
}
'
for
i
in
modules
]
# Pick all sequences or those selected.
if
not
sequences
or
sequences
==
[
-
1
]:
do_sequence
=
lambda
seq
:
True
else
:
do_sequence
=
[
int
(
x
)
for
x
in
sequences
].
__contains__
# List of detector sources.
det_inp_sources
=
[
input_source
.
format
(
karabo_id
=
karabo_id
,
module_index
=
int
(
da
[
-
2
:]))
for
da
in
karabo_da
]
```
%% Cell type:code id: tags:
```
python
# Connect to CalCat.
calcat_config
=
restful_config
[
'
calcat
'
]
client
=
CalibrationClient
(
base_api_url
=
calcat_config
[
'
base-api-url
'
],
use_oauth2
=
calcat_config
[
'
use-oauth2
'
],
client_id
=
calcat_config
[
'
user-id
'
],
client_secret
=
calcat_config
[
'
user-secret
'
],
user_email
=
calcat_config
[
'
user-email
'
],
token_url
=
calcat_config
[
'
token-url
'
],
refresh_url
=
calcat_config
[
'
refresh-url
'
],
auth_url
=
calcat_config
[
'
auth-url
'
],
scope
=
''
)
```
%% Cell type:code id: tags:
```
python
dark_calibrations
=
{
1
1
:
'
Offset
'
,
1
:
'
Offset
'
,
14
:
'
BadPixelsDark
'
,
}
dark_condition
=
[
dict
(
parameter_id
=
1
,
value
=
bias_voltage
),
# Sensor bias voltage
dict
(
parameter_id
=
7
,
value
=
mem_cells
),
# Memory cells
dict
(
parameter_id
=
15
,
value
=
capacitor
),
# Feedback capacitor
dict
(
parameter_id
=
13
,
value
=
256
),
# Pixels X
dict
(
parameter_id
=
14
,
value
=
256
),
# Pixels Y
]
illuminated_calibrations
=
{
20
:
'
BadPixelsFF
'
,
4
3
2
:
'
GainAmpMap
'
,
4
1
3
:
'
FFMap
'
,
4
2
4
:
'
RelativeGain
'
,
42
:
'
GainAmpMap
'
,
43
:
'
FFMap
'
,
44
:
'
RelativeGain
'
,
}
illuminated_condition
=
dark_condition
.
copy
()
illuminated_condition
+=
[
dict
(
parameter_id
=
3
,
value
=
photon_energy
),
# Source energy
dict
(
parameter_id
=
25
,
value
=
category
)
# category
]
const_data
=
{}
const_load_mp
=
psh
.
ProcessContext
(
num_workers
=
24
)
print
(
'
Querying calibration database
'
,
end
=
''
,
flush
=
True
)
start
=
perf_counter
()
for
k_da
in
karabo_da
:
pdu
=
None
if
k_da
in
retrieved_constants
:
print
(
f
"
Constant for
{
k_da
}
already in
{
metadata
.
filename
}
, won
'
t query again.
"
)
# noqa
continue
retrieved_constants
[
k_da
]
=
dict
()
const_mdata
=
retrieved_constants
[
k_da
][
"
constants
"
]
=
dict
()
for
calibrations
,
condition
in
[
(
dark_calibrations
,
dark_condition
),
(
illuminated_calibrations
,
illuminated_condition
)
]:
resp
=
CalibrationConstantVersion
.
get_closest_by_time_by_detector_conditions
(
client
,
karabo_id
,
list
(
calibrations
.
keys
()),
{
'
parameters_conditions_attributes
'
:
condition
},
karabo_da
=
k_da
,
event_at
=
creation_time
.
isoformat
(),
snapshot_at
=
None
)
if
not
resp
[
"
success
"
]:
print
(
f
"
ERROR: Constants
{
list
(
calibrations
.
values
())
}
"
f
"
were not retrieved,
{
resp
[
'
app_info
'
]
}
"
)
const_mdata
[
"
file-path
"
]
=
None
const_mdata
[
"
dataset-name
"
]
=
None
const_mdata
[
"
creation-time
"
]
=
None
for
cname
in
calibrations
.
values
():
const_mdata
[
cname
]
=
dict
()
const_mdata
[
cname
][
"
file-path
"
]
=
None
const_mdata
[
cname
][
"
dataset-name
"
]
=
None
const_mdata
[
cname
][
"
creation-time
"
]
=
None
continue
for
ccv
in
resp
[
"
data
"
]:
cc
=
ccv
[
'
calibration_constant
'
]
calibration_name
=
calibrations
[
cc
[
'
calibration_id
'
]]
const_mdata
[
"
file-path
"
]
=
str
(
Path
(
ccv
[
'
path_to_file
'
])
/
ccv
[
'
file_name
'
])
const_mdata
[
"
dataset-name
"
]
=
ccv
[
'
data_set_name
'
]
const_mdata
[
"
creation-time
"
]
=
ccv
[
'
begin_at
'
]
cname
=
calibrations
[
cc
[
'
calibration_id
'
]]
const_mdata
[
cname
]
=
dict
()
const_mdata
[
cname
][
"
file-path
"
]
=
str
(
Path
(
ccv
[
'
path_to_file
'
])
/
ccv
[
'
file_name
'
])
const_mdata
[
cname
][
"
dataset-name
"
]
=
ccv
[
'
data_set_name
'
]
const_mdata
[
cname
][
"
creation-time
"
]
=
ccv
[
'
begin_at
'
]
pdu
=
ccv
[
'
physical_detector_unit
'
][
'
physical_name
'
]
print
(
'
.
'
,
end
=
''
,
flush
=
True
)
retrieved_constants
[
k_da
][
"
physical-detector-unit
"
]
=
pdu
metadata
.
save
()
total_time
=
perf_counter
()
-
start
print
(
f
'
{
total_time
:
.
1
f
}
s
'
)
print
(
f
"
Stored retrieved constants in
{
metadata
.
filename
}
"
)
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment