Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add AsymptoticCalculator #750

Merged
merged 25 commits into from
Mar 4, 2020
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ Inference
mle.twice_nll
mle.fit
mle.fixed_poi_fit
utils.generate_asimov_data
utils.pvals_from_teststat
utils.pvals_from_teststat_expected
calculators.generate_asimov_data
calculators.AsymptoticTestStatDistribution
calculators.AsymptoticCalculator

Exceptions
----------
Expand Down
35 changes: 18 additions & 17 deletions src/pyhf/infer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
"""Inference for Statistical Models."""

from .test_statistics import qmu
from .utils import (
generate_asimov_data,
pvals_from_teststat,
pvals_from_teststat_expected,
)
from .. import get_backend
from .calculators import AsymptoticCalculator


def hypotest(
Expand Down Expand Up @@ -78,30 +74,35 @@ def hypotest(
par_bounds = par_bounds or pdf.config.suggested_bounds()
tensorlib, _ = get_backend()

asimov_mu = 0.0
asimov_data = generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds)
calc = AsymptoticCalculator(data, pdf, init_pars, par_bounds, qtilde=qtilde)
kratsg marked this conversation as resolved.
Show resolved Hide resolved
teststat = calc.teststatistic(poi_test)
sb_dist, b_dist = calc.distributions(poi_test)
lukasheinrich marked this conversation as resolved.
Show resolved Hide resolved

qmu_v = qmu(poi_test, data, pdf, init_pars, par_bounds)
sqrtqmu_v = tensorlib.sqrt(qmu_v)

qmuA_v = qmu(poi_test, asimov_data, pdf, init_pars, par_bounds)
sqrtqmuA_v = tensorlib.sqrt(qmuA_v)

CLsb, CLb, CLs = pvals_from_teststat(sqrtqmu_v, sqrtqmuA_v, qtilde=qtilde)
CLsb = sb_dist.pvalue(teststat)
CLb = b_dist.pvalue(teststat)
CLs = CLsb / CLb
CLsb, CLb, CLs = (
tensorlib.reshape(CLsb, (1,)),
tensorlib.reshape(CLb, (1,)),
tensorlib.reshape(CLs, (1,)),
)

_returns = [CLs]
if kwargs.get('return_tail_probs'):
_returns.append([CLsb, CLb])
if kwargs.get('return_expected_set'):
CLs_exp = []
for n_sigma in [-2, -1, 0, 1, 2]:
CLs_exp.append(pvals_from_teststat_expected(sqrtqmuA_v, nsigma=n_sigma)[-1])
for n_sigma in [2, 1, 0, -1, -2]:
CLs = sb_dist.pvalue(n_sigma) / b_dist.pvalue(n_sigma)
CLs_exp.append(tensorlib.reshape(CLs, (1,)))
CLs_exp = tensorlib.astensor(CLs_exp)
if kwargs.get('return_expected'):
_returns.append(CLs_exp[2])
_returns.append(CLs_exp)
elif kwargs.get('return_expected'):
_returns.append(pvals_from_teststat_expected(sqrtqmuA_v)[-1])
n_sigma = 0
CLs = sb_dist.pvalue(n_sigma) / b_dist.pvalue(n_sigma)
_returns.append(tensorlib.reshape(CLs, (1,)))
# Enforce a consistent return type of the observed CLs
return tuple(_returns) if len(_returns) > 1 else _returns[0]

Expand Down
165 changes: 165 additions & 0 deletions src/pyhf/infer/calculators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""
Calculators for Hypothesis Testing.

The role of the calculators is to compute test statistic and
provide distributions of said test statistic under various
hypotheses.

Using the calculators hypothesis tests can then be performed.
"""
from .mle import fixed_poi_fit
from .. import get_backend
from .test_statistics import qmu


def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds):
"""
Compute Asimov Dataset (expected yields at best-fit values) for a given POI value.

Args:
asimov_mu (`float`): The value for the parameter of interest to be used.
data (`tensor`): The observed data.
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
init_pars (`tensor`): The initial parameter values to be used for fitting.
par_bounds (`tensor`): The parameter value bounds to be used for fitting.

Returns:
Tensor: The Asimov dataset.

"""
bestfit_nuisance_asimov = fixed_poi_fit(asimov_mu, data, pdf, init_pars, par_bounds)
return pdf.expected_data(bestfit_nuisance_asimov)


class AsymptoticTestStatDistribution(object):
"""
The distribution the test statistic in the asymptotic case.

Note: These distributions are in :math:`-\hat{\mu}/\sigma` space.
In the ROOT implementation the same sigma is assumed for both hypotheses
and :math:`p`-values etc are computed in that space.
This assumption is necessarily valid, but we keep this for compatibility reasons.

In the :math:`-\hat{\mu}/\sigma` space, the test statistic (i.e. :math:`\hat{\mu}/\sigma`) is
normally distributed with unit variance and its mean at
the :math:`-\mu'`, where :math:`\mu'` is the true poi value of the hypothesis.
"""

def __init__(self, shift):
"""
Asymptotic test statistic distribution.

Args:
shift (`float`): The displacement of the test statistic distribution.

Returns:
~pyhf.infer.calculators.AsymptoticTestStatDistribution: The asymptotic distribution of test statistic.

"""
self.shift = shift

def pvalue(self, value):
"""
Compute the :math:`p`-value for a given value of the test statistic.

Args:
value (`float`): The test statistic value.

Returns:
Float: The integrated probability to observe a value at least as large as the observed one.

"""
tensorlib, _ = get_backend()
return 1 - tensorlib.normal_cdf(value - self.shift)

def expected_value(self, nsigma):
"""
Return the expected value of the test statistic.

Args:
nsigma (`int` or `tensor`): The number of standard deviations.

Returns:
Float: The expected value of the test statistic.
"""
return nsigma


class AsymptoticCalculator(object):
"""The Asymptotic Calculator."""

def __init__(self, data, pdf, init_pars=None, par_bounds=None, qtilde=False):
"""
Asymptotic Calculator.

Args:
data (`tensor`): The observed data.
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
init_pars (`tensor`): The initial parameter values to be used for fitting.
par_bounds (`tensor`): The parameter value bounds to be used for fitting.
kratsg marked this conversation as resolved.
Show resolved Hide resolved

Returns:
~pyhf.infer.calculators.AsymptoticCalculator: The calculator for asymptotic quantities.

"""
self.data = data
self.pdf = pdf
self.init_pars = init_pars or pdf.config.suggested_init()
self.par_bounds = par_bounds or pdf.config.suggested_bounds()
self.qtilde = qtilde

def distributions(self, poi_test):
"""
Probability Distributions of the test statistic value under the signal + background and and background-only hypothesis.

Args:
poi_test: The value for the parameter of interest.

Returns:
Tuple (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distributions under the hypotheses.

"""
sb_dist = AsymptoticTestStatDistribution(-self.sqrtqmuA_v)
kratsg marked this conversation as resolved.
Show resolved Hide resolved
b_dist = AsymptoticTestStatDistribution(0.0)
return sb_dist, b_dist

def teststatistic(self, poi_test):
"""
Compute the test statistic for the observed data under the studied model.

Args:
poi_test: The value for the parameter of interest.

Returns:
Float: the value of the test statistic.

"""
tensorlib, _ = get_backend()
qmu_v = qmu(poi_test, self.data, self.pdf, self.init_pars, self.par_bounds)
sqrtqmu_v = tensorlib.sqrt(qmu_v)

asimov_mu = 0.0
asimov_data = generate_asimov_data(
asimov_mu, self.data, self.pdf, self.init_pars, self.par_bounds
)
qmuA_v = qmu(poi_test, asimov_data, self.pdf, self.init_pars, self.par_bounds)
self.sqrtqmuA_v = tensorlib.sqrt(qmuA_v)

if not self.qtilde: # qmu
teststat = sqrtqmu_v - self.sqrtqmuA_v
else: # qtilde

def _true_case():
teststat = sqrtqmu_v - self.sqrtqmuA_v
return teststat

def _false_case():
qmu = tensorlib.power(sqrtqmu_v, 2)
qmu_A = tensorlib.power(self.sqrtqmuA_v, 2)
teststat = (qmu - qmu_A) / (2 * self.sqrtqmuA_v)
return teststat

teststat = tensorlib.conditional(
(sqrtqmu_v < self.sqrtqmuA_v), _true_case, _false_case
)
return teststat
98 changes: 0 additions & 98 deletions src/pyhf/infer/utils.py

This file was deleted.

9 changes: 4 additions & 5 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ def expected_result_1bin_normsys(mu=1.0):
if mu == 1:
expected_result = {
"exp": [
7.471694618861785e-10,
7.471684419037561e-10,
5.7411551509088054e-08,
3.6898088058290313e-06,
0.000169657315363677,
0.004392708998183163,
3.6898088062731205e-06,
0.00016965731538267896,
0.004392708998555453,
],
"obs": 0.0006735317023683173,
}
Expand Down Expand Up @@ -701,7 +701,6 @@ def validate_hypotest(pdf, data, mu_test, expected_result, tolerance=1e-6):
return_expected_set=True,
qtilde=False,
)

assert abs(CLs_obs - expected_result['obs']) / expected_result['obs'] < tolerance
for result, expected in zip(CLs_exp_set, expected_result['exp']):
assert abs(result - expected) / expected < tolerance
Expand Down