# and the only regularization is to prevent the weights from becoming > 18 + 3 sqrt(var) ~= 50, making this a very loose regularization.
# and the only regularization is to prevent the weights from becoming > 18 + 3 sqrt(var) ~= 50, making this a very loose regularization.
# An alternative would be to set the (alpha, beta) both to very low values, whichmakes the hyper prior become closer to the non-informative Jeffrey's prior.
# An alternative would be to set the (alpha, beta) both to very low values, whichmakes the hyper prior become closer to the non-informative Jeffrey's prior.
# Using this alternative (ie: (0.1, 0.1) for the weights' hyper prior) leads to very large lambda and numerical issues with the fit.
# Using this alternative (ie: (0.1, 0.1) for the weights' hyper prior) leads to very large lambda and numerical issues with the fit.
self.alpha_lambda=3.0
self.alpha_lambda=0.1
self.beta_lambda=6.0
self.beta_lambda=0.1
# Hyperprior choice on the likelihood noise level:
# Hyperprior choice on the likelihood noise level:
# The likelihood noise level is controlled by sigma in the likelihood and it should be allowed to be very broad, but different
# The likelihood noise level is controlled by sigma in the likelihood and it should be allowed to be very broad, but different
...
@@ -92,8 +92,8 @@ class BNN(nn.Module):
...
@@ -92,8 +92,8 @@ class BNN(nn.Module):
# Making both alpha and beta small makes the gamma distribution closer to the Jeffey's prior, which makes it non-informative
# Making both alpha and beta small makes the gamma distribution closer to the Jeffey's prior, which makes it non-informative
# This seems to lead to a larger training time, though.
# This seems to lead to a larger training time, though.
# Since, after standardization, we know to expect the variance to be of order (1), we can select also alpha and beta leading to high variance in this range
# Since, after standardization, we know to expect the variance to be of order (1), we can select also alpha and beta leading to high variance in this range
self.alpha_sigma=2.0
self.alpha_sigma=0.1
self.beta_sigma=0.15
self.beta_sigma=0.1
self.model=nn.Sequential(
self.model=nn.Sequential(
bnn.BayesLinear(prior_mu=0.0,
bnn.BayesLinear(prior_mu=0.0,
...
@@ -201,7 +201,7 @@ class BNNModel(RegressorMixin, BaseEstimator):
...
@@ -201,7 +201,7 @@ class BNNModel(RegressorMixin, BaseEstimator):
self.model=BNN(X.shape[1],y.shape[1])
self.model=BNN(X.shape[1],y.shape[1])
# prepare data loader
# prepare data loader
B=5
B=100
loader=DataLoader(ds,
loader=DataLoader(ds,
batch_size=B,
batch_size=B,
num_workers=5,
num_workers=5,
...
@@ -223,7 +223,7 @@ class BNNModel(RegressorMixin, BaseEstimator):
...
@@ -223,7 +223,7 @@ class BNNModel(RegressorMixin, BaseEstimator):
parser.add_argument('-w','--weight',action="store_true",default=False,help='Whether to reweight data as a function of the pulse energy to make it invariant to that.')
parser.add_argument('-w','--weight',action="store_true",default=True,help='Whether to reweight data as a function of the pulse energy to make it invariant to that.')