Skip to content

Commit

Permalink
feat: Add AsymptoticCalculator (#750)
Browse files Browse the repository at this point in the history
* Add an 'infer.calculators' API giving an asymptotic calculator and asymptotic test stat distribution
* Move calculator functions from 'infer.utils' to 'infer.calculators'
  • Loading branch information
kratsg authored Mar 4, 2020
1 parent e3f0eb3 commit 73da08d
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 123 deletions.
6 changes: 3 additions & 3 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ Inference
mle.twice_nll
mle.fit
mle.fixed_poi_fit
utils.generate_asimov_data
utils.pvals_from_teststat
utils.pvals_from_teststat_expected
calculators.generate_asimov_data
calculators.AsymptoticTestStatDistribution
calculators.AsymptoticCalculator

Exceptions
----------
Expand Down
39 changes: 22 additions & 17 deletions src/pyhf/infer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
"""Inference for Statistical Models."""

from .test_statistics import qmu
from .utils import (
generate_asimov_data,
pvals_from_teststat,
pvals_from_teststat_expected,
)
from .. import get_backend
from .calculators import AsymptoticCalculator


def hypotest(
Expand Down Expand Up @@ -78,30 +74,39 @@ def hypotest(
par_bounds = par_bounds or pdf.config.suggested_bounds()
tensorlib, _ = get_backend()

asimov_mu = 0.0
asimov_data = generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds)
calc = AsymptoticCalculator(data, pdf, init_pars, par_bounds, qtilde=qtilde)
teststat = calc.teststatistic(poi_test)
sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)

qmu_v = qmu(poi_test, data, pdf, init_pars, par_bounds)
sqrtqmu_v = tensorlib.sqrt(qmu_v)

qmuA_v = qmu(poi_test, asimov_data, pdf, init_pars, par_bounds)
sqrtqmuA_v = tensorlib.sqrt(qmuA_v)

CLsb, CLb, CLs = pvals_from_teststat(sqrtqmu_v, sqrtqmuA_v, qtilde=qtilde)
CLsb = sig_plus_bkg_distribution.pvalue(teststat)
CLb = b_only_distribution.pvalue(teststat)
CLs = CLsb / CLb
CLsb, CLb, CLs = (
tensorlib.reshape(CLsb, (1,)),
tensorlib.reshape(CLb, (1,)),
tensorlib.reshape(CLs, (1,)),
)

_returns = [CLs]
if kwargs.get('return_tail_probs'):
_returns.append([CLsb, CLb])
if kwargs.get('return_expected_set'):
CLs_exp = []
for n_sigma in [-2, -1, 0, 1, 2]:
CLs_exp.append(pvals_from_teststat_expected(sqrtqmuA_v, nsigma=n_sigma)[-1])
for n_sigma in [2, 1, 0, -1, -2]:
CLs = sig_plus_bkg_distribution.pvalue(
n_sigma
) / b_only_distribution.pvalue(n_sigma)
CLs_exp.append(tensorlib.reshape(CLs, (1,)))
CLs_exp = tensorlib.astensor(CLs_exp)
if kwargs.get('return_expected'):
_returns.append(CLs_exp[2])
_returns.append(CLs_exp)
elif kwargs.get('return_expected'):
_returns.append(pvals_from_teststat_expected(sqrtqmuA_v)[-1])
n_sigma = 0
CLs = sig_plus_bkg_distribution.pvalue(n_sigma) / b_only_distribution.pvalue(
n_sigma
)
_returns.append(tensorlib.reshape(CLs, (1,)))
# Enforce a consistent return type of the observed CLs
return tuple(_returns) if len(_returns) > 1 else _returns[0]

Expand Down
170 changes: 170 additions & 0 deletions src/pyhf/infer/calculators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""
Calculators for Hypothesis Testing.
The role of the calculators is to compute test statistic and
provide distributions of said test statistic under various
hypotheses.
Using the calculators hypothesis tests can then be performed.
"""
from .mle import fixed_poi_fit
from .. import get_backend
from .test_statistics import qmu


def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds):
"""
Compute Asimov Dataset (expected yields at best-fit values) for a given POI value.
Args:
asimov_mu (`float`): The value for the parameter of interest to be used.
data (`tensor`): The observed data.
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
init_pars (`tensor`): The initial parameter values to be used for fitting.
par_bounds (`tensor`): The parameter value bounds to be used for fitting.
Returns:
Tensor: The Asimov dataset.
"""
bestfit_nuisance_asimov = fixed_poi_fit(asimov_mu, data, pdf, init_pars, par_bounds)
return pdf.expected_data(bestfit_nuisance_asimov)


class AsymptoticTestStatDistribution(object):
"""
The distribution the test statistic in the asymptotic case.
Note: These distributions are in :math:`-\hat{\mu}/\sigma` space.
In the ROOT implementation the same sigma is assumed for both hypotheses
and :math:`p`-values etc are computed in that space.
This assumption is necessarily valid, but we keep this for compatibility reasons.
In the :math:`-\hat{\mu}/\sigma` space, the test statistic (i.e. :math:`\hat{\mu}/\sigma`) is
normally distributed with unit variance and its mean at
the :math:`-\mu'`, where :math:`\mu'` is the true poi value of the hypothesis.
"""

def __init__(self, shift):
"""
Asymptotic test statistic distribution.
Args:
shift (`float`): The displacement of the test statistic distribution.
Returns:
~pyhf.infer.calculators.AsymptoticTestStatDistribution: The asymptotic distribution of test statistic.
"""
self.shift = shift
self.sqrtqmuA_v = None

def pvalue(self, value):
"""
Compute the :math:`p`-value for a given value of the test statistic.
Args:
value (`float`): The test statistic value.
Returns:
Float: The integrated probability to observe a value at least as large as the observed one.
"""
tensorlib, _ = get_backend()
return 1 - tensorlib.normal_cdf(value - self.shift)

def expected_value(self, nsigma):
"""
Return the expected value of the test statistic.
Args:
nsigma (`int` or `tensor`): The number of standard deviations.
Returns:
Float: The expected value of the test statistic.
"""
return nsigma


class AsymptoticCalculator(object):
"""The Asymptotic Calculator."""

def __init__(self, data, pdf, init_pars=None, par_bounds=None, qtilde=False):
"""
Asymptotic Calculator.
Args:
data (`tensor`): The observed data.
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
init_pars (`tensor`): The initial parameter values to be used for fitting.
par_bounds (`tensor`): The parameter value bounds to be used for fitting.
qtilde (`bool`): Whether to use qtilde as the test statistic.
Returns:
~pyhf.infer.calculators.AsymptoticCalculator: The calculator for asymptotic quantities.
"""
self.data = data
self.pdf = pdf
self.init_pars = init_pars or pdf.config.suggested_init()
self.par_bounds = par_bounds or pdf.config.suggested_bounds()
self.qtilde = qtilde
self.sqrtqmuA_v = None

def distributions(self, poi_test):
"""
Probability Distributions of the test statistic value under the signal + background and and background-only hypothesis.
Args:
poi_test: The value for the parameter of interest.
Returns:
Tuple (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distributions under the hypotheses.
"""
if self.sqrtqmuA_v is None:
raise RuntimeError('need to call .teststatistic(poi_test) first')
sb_dist = AsymptoticTestStatDistribution(-self.sqrtqmuA_v)
b_dist = AsymptoticTestStatDistribution(0.0)
return sb_dist, b_dist

def teststatistic(self, poi_test):
"""
Compute the test statistic for the observed data under the studied model.
Args:
poi_test: The value for the parameter of interest.
Returns:
Float: the value of the test statistic.
"""
tensorlib, _ = get_backend()
qmu_v = qmu(poi_test, self.data, self.pdf, self.init_pars, self.par_bounds)
sqrtqmu_v = tensorlib.sqrt(qmu_v)

asimov_mu = 0.0
asimov_data = generate_asimov_data(
asimov_mu, self.data, self.pdf, self.init_pars, self.par_bounds
)
qmuA_v = qmu(poi_test, asimov_data, self.pdf, self.init_pars, self.par_bounds)
self.sqrtqmuA_v = tensorlib.sqrt(qmuA_v)

if not self.qtilde: # qmu
teststat = sqrtqmu_v - self.sqrtqmuA_v
else: # qtilde

def _true_case():
teststat = sqrtqmu_v - self.sqrtqmuA_v
return teststat

def _false_case():
qmu = tensorlib.power(sqrtqmu_v, 2)
qmu_A = tensorlib.power(self.sqrtqmuA_v, 2)
teststat = (qmu - qmu_A) / (2 * self.sqrtqmuA_v)
return teststat

teststat = tensorlib.conditional(
(sqrtqmu_v < self.sqrtqmuA_v), _true_case, _false_case
)
return teststat
98 changes: 0 additions & 98 deletions src/pyhf/infer/utils.py

This file was deleted.

9 changes: 9 additions & 0 deletions tests/test_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,12 @@ def logpdf(self, pars, data):
)

assert np.isclose(cls[0], 0.7267836451638846)


@pytest.mark.parametrize("qtilde", [True, False])
def test_calculator_distributions_without_teststatistic(qtilde):
calc = pyhf.infer.AsymptoticCalculator(
[0.0], {}, [1.0], [(0.0, 10.0)], qtilde=qtilde
)
with pytest.raises(RuntimeError):
calc.distributions(1.0)
9 changes: 4 additions & 5 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ def expected_result_1bin_normsys(mu=1.0):
if mu == 1:
expected_result = {
"exp": [
7.471694618861785e-10,
7.471684419037561e-10,
5.7411551509088054e-08,
3.6898088058290313e-06,
0.000169657315363677,
0.004392708998183163,
3.6898088062731205e-06,
0.00016965731538267896,
0.004392708998555453,
],
"obs": 0.0006735317023683173,
}
Expand Down Expand Up @@ -701,7 +701,6 @@ def validate_hypotest(pdf, data, mu_test, expected_result, tolerance=1e-6):
return_expected_set=True,
qtilde=False,
)

assert abs(CLs_obs - expected_result['obs']) / expected_result['obs'] < tolerance
for result, expected in zip(CLs_exp_set, expected_result['exp']):
assert abs(result - expected) / expected < tolerance
Expand Down

0 comments on commit 73da08d

Please sign in to comment.