Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pes_to_spec
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Machine Learning projects.
pes_to_spec
Commits
ff1a99d6
Commit
ff1a99d6
authored
2 years ago
by
Danilo Ferreira de Lima
Browse files
Options
Downloads
Patches
Plain Diff
Switched to IncrementalPCA.
parent
201a1568
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
pes_to_spec/model.py
+20
-10
20 additions, 10 deletions
pes_to_spec/model.py
scripts/test_analysis.py
+40
-4
40 additions, 4 deletions
scripts/test_analysis.py
with
60 additions
and
14 deletions
pes_to_spec/model.py
+
20
−
10
View file @
ff1a99d6
...
...
@@ -4,9 +4,8 @@ from autograd import grad
import
joblib
import
h5py
from
scipy.signal
import
fftconvolve
from
scipy.signal.windows
import
gaussian
as
gaussian_window
from
scipy.optimize
import
fmin_l_bfgs_b
from
sklearn.decomposition
import
PCA
from
sklearn.decomposition
import
PCA
,
IncrementalPCA
from
sklearn.model_selection
import
train_test_split
from
typing
import
Any
,
Dict
,
List
,
Optional
...
...
@@ -25,9 +24,9 @@ class Model(object):
channels: Selected channels to use as an input for the low resolution data.
n_pca_lr: Number of low-resolution data PCA components.
n_pca_hr: Number of high-resolution data PCA components.
high_res_sigma: Resolution of the high-resolution spectrometer.
tof_start: Start looking at this index from the low-resolution spectrometer data.
delta_tof: Number of components to take from the low-resolution spectrometer.
high_res_sigma: Resolution of the high-resolution spectrometer
in electron-Volts
.
tof_start: Start looking at this index from the low-resolution spectrometer data.
Set to None to perform no selection
delta_tof: Number of components to take from the low-resolution spectrometer.
Set to None to perform no selection.
validation_size: Fraction (number between 0 and 1) of the data to take for validation and systematic uncertainty estimate.
"""
...
...
@@ -41,15 +40,15 @@ class Model(object):
n_pca_lr
:
int
=
400
,
n_pca_hr
:
int
=
20
,
high_res_sigma
:
float
=
0.2
,
tof_start
:
int
=
31445
,
delta_tof
:
int
=
200
,
tof_start
:
Optional
[
int
]
=
31445
,
delta_tof
:
Optional
[
int
]
=
200
,
validation_size
:
float
=
0.05
):
self
.
channels
=
channels
self
.
n_pca_lr
=
n_pca_lr
self
.
n_pca_hr
=
n_pca_hr
# PCA models
self
.
lr_pca
=
PCA
(
n_pca_lr
,
whiten
=
True
)
self
.
lr_pca
=
Incremental
PCA
(
n_pca_lr
,
whiten
=
True
,
batch_size
=
n_pca_lr
)
self
.
hr_pca
=
PCA
(
n_pca_hr
,
whiten
=
True
)
# PCA unc. in high resolution
...
...
@@ -64,7 +63,6 @@ class Model(object):
# where to cut on the ToF PES data
self
.
tof_start
=
tof_start
self
.
delta_tof
=
delta_tof
self
.
tof_end
=
self
.
tof_start
+
self
.
delta_tof
# high-resolution photon energy axis
self
.
high_res_photon_energy
:
Optional
[
np
.
ndarray
]
=
None
...
...
@@ -82,7 +80,10 @@ class Model(object):
Returns: Concatenated and pre-processed low-resolution data of shape (train_id, features).
"""
cat
=
np
.
concatenate
([
low_res_data
[
k
][:,
self
.
tof_start
:
self
.
tof_end
]
for
k
in
self
.
channels
],
axis
=
1
)
items
=
[
low_res_data
[
k
]
for
k
in
self
.
channels
]
if
self
.
tof_start
is
not
None
and
self
.
delta_tof
is
not
None
:
items
=
[
item
[:,
self
.
tof_start
:(
self
.
tof_start
+
self
.
delta_tof
)]
for
item
in
items
]
cat
=
np
.
concatenate
(
items
,
axis
=
1
)
return
cat
def
preprocess_high_res
(
self
,
high_res_data
:
np
.
ndarray
,
high_res_photon_energy
:
np
.
ndarray
)
->
np
.
ndarray
:
...
...
@@ -117,19 +118,28 @@ class Model(object):
self
.
high_res_photon_energy
=
high_res_photon_energy
print
(
"
Pre-processing low
"
)
low_res
=
self
.
preprocess_low_res
(
low_res_data
)
print
(
"
Pre-processing high
"
)
high_res
=
self
.
preprocess_high_res
(
high_res_data
,
high_res_photon_energy
)
# fit PCA
print
(
"
PCA low
"
)
low_pca
=
self
.
lr_pca
.
fit_transform
(
low_res
)
print
(
"
PCA high
"
)
high_pca
=
self
.
hr_pca
.
fit_transform
(
high_res
)
print
(
"
Split
"
)
# split in train and test for PCA uncertainty evaluation
low_pca_train
,
low_pca_test
,
high_pca_train
,
high_pca_test
=
train_test_split
(
low_pca
,
high_pca
,
test_size
=
self
.
validation_size
,
random_state
=
42
)
# fit the linear model
print
(
"
Fit
"
)
self
.
fit_model
.
fit
(
low_pca_train
,
high_pca_train
,
low_pca_test
,
high_pca_test
)
print
(
"
PCA unc
"
)
high_pca_rec
=
self
.
hr_pca
.
inverse_transform
(
high_pca
)
self
.
high_pca_unc
=
np
.
sqrt
(
np
.
mean
((
high_res
-
high_pca_rec
)
**
2
,
axis
=
0
,
keepdims
=
True
))
print
(
"
Done
"
)
return
high_res
def
predict
(
self
,
low_res_data
:
Dict
[
str
,
np
.
ndarray
])
->
np
.
ndarray
:
...
...
This diff is collapsed.
Click to expand it.
scripts/test_analysis.py
+
40
−
4
View file @
ff1a99d6
...
...
@@ -17,6 +17,26 @@ from matplotlib.gridspec import GridSpec
from
typing
import
Optional
def
plot_pes
(
filename
:
str
,
pes_raw_int
:
np
.
ndarray
):
"""
Plot low-resolution spectrum.
Args:
filename: Output file name.
pes_raw_int: Low-resolution spectrum.
"""
fig
=
plt
.
figure
(
figsize
=
(
16
,
8
))
gs
=
GridSpec
(
1
,
1
)
ax
=
fig
.
add_subplot
(
gs
[
0
,
0
])
ax
.
plot
(
pes_raw_int
,
c
=
'
b
'
,
lw
=
3
,
label
=
"
Low-resolution measurement
"
)
ax
.
legend
()
ax
.
set
(
title
=
f
""
,
xlabel
=
"
ToF index
"
,
ylabel
=
"
Intensity
"
)
fig
.
savefig
(
filename
)
plt
.
close
(
fig
)
def
plot_result
(
filename
:
str
,
spec_pred
:
np
.
ndarray
,
spec_smooth
:
np
.
ndarray
,
spec_raw_pe
:
np
.
ndarray
,
spec_raw_int
:
Optional
[
np
.
ndarray
]
=
None
):
"""
Plot result with uncertainty band.
...
...
@@ -33,12 +53,12 @@ def plot_result(filename: str, spec_pred: np.ndarray, spec_smooth: np.ndarray, s
gs
=
GridSpec
(
1
,
1
)
ax
=
fig
.
add_subplot
(
gs
[
0
,
0
])
eps
=
np
.
mean
(
spec_pred
[:,
1
])
ax
.
plot
(
spec_raw_pe
,
spec_smooth
,
c
=
'
b
'
,
lw
=
3
,
label
=
"
High
resolution measurement (smoothened)
"
)
ax
.
plot
(
spec_raw_pe
,
spec_pred
[:,
0
],
c
=
'
r
'
,
lw
=
3
,
label
=
"
High
resolution prediction
"
)
ax
.
plot
(
spec_raw_pe
,
spec_smooth
,
c
=
'
b
'
,
lw
=
3
,
label
=
"
High
-
resolution measurement (smoothened)
"
)
ax
.
plot
(
spec_raw_pe
,
spec_pred
[:,
0
],
c
=
'
r
'
,
lw
=
3
,
label
=
"
High
-
resolution prediction
"
)
ax
.
fill_between
(
spec_raw_pe
,
spec_pred
[:,
0
]
-
spec_pred
[:,
1
],
spec_pred
[:,
0
]
+
spec_pred
[:,
1
],
facecolor
=
'
red
'
,
alpha
=
0.6
,
label
=
"
68% unc. (stat.)
"
)
ax
.
fill_between
(
spec_raw_pe
,
spec_pred
[:,
0
]
-
spec_pred
[:,
2
],
spec_pred
[:,
0
]
+
spec_pred
[:,
2
],
facecolor
=
'
magenta
'
,
alpha
=
0.6
,
label
=
"
68% unc. (syst., PCA)
"
)
if
spec_raw_int
is
not
None
:
ax
.
plot
(
spec_raw_pe
,
spec_raw_int
,
c
=
'
b
'
,
lw
=
1
,
ls
=
'
--
'
,
label
=
"
High
resolution measurement
"
)
ax
.
plot
(
spec_raw_pe
,
spec_raw_int
,
c
=
'
b
'
,
lw
=
1
,
ls
=
'
--
'
,
label
=
"
High
-
resolution measurement
"
)
ax
.
legend
()
ax
.
set
(
title
=
f
"
avg(unc) =
{
eps
}
"
,
xlabel
=
"
Photon energy [eV]
"
,
...
...
@@ -81,18 +101,34 @@ def main():
#retvol_raw = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].select_trains(by_id[tids]).ndarray()
#retvol_raw_timestamp = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].select_trains(by_id[tids]).ndarray()
model
=
Model
()
model
=
Model
(
channels
=
[
"
channel_1_D
"
,
"
channel_2_B
"
,
"
channel_3_A
"
,
"
channel_3_B
"
,
"
channel_4_C
"
,
"
channel_4_D
"
],
n_pca_lr
=
400
,
n_pca_hr
=
20
,
high_res_sigma
=
0.2
,
tof_start
=
None
,
delta_tof
=
None
,
validation_size
=
0.05
)
train_idx
=
np
.
isin
(
tids
,
train_tids
)
print
(
"
Fitting
"
)
model
.
fit
({
k
:
v
[
train_idx
,
:]
for
k
,
v
in
pes_raw
.
items
()},
spec_raw_int
[
train_idx
,
:],
spec_raw_pe
[
train_idx
,
:])
spec_smooth
=
model
.
preprocess_high_res
(
spec_raw_int
,
spec_raw_pe
)
# test
print
(
"
Predict
"
)
spec_pred
=
model
.
predict
(
pes_raw
)
# plot
for
tid
in
test_tids
:
idx
=
np
.
where
(
tid
==
tids
)[
0
][
0
]
plot_result
(
f
"
test_
{
tid
}
.png
"
,
spec_pred
[
idx
,
:,
:],
spec_smooth
[
idx
,
:],
spec_raw_pe
[
idx
,
:],
spec_raw_int
[
idx
,
:])
for
ch
in
channels
:
plot_pes
(
f
"
test_pes_
{
tid
}
_
{
ch
}
.png
"
,
pes_raw
[
ch
][
idx
,
:])
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment