feat: Add AsymptoticCalculator (#750)

* Add an 'infer.calculators' API giving an asymptotic calculator and asymptotic test stat distribution * Move calculator functions from 'infer.utils' to 'infer.calculators'
scikit-hep · Mar 4, 2020 · 73da08d · 73da08d
1 parent e3f0eb3
commit 73da08d
Show file tree

Hide file tree

Showing 6 changed files with 208 additions and 123 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -122,9 +122,9 @@ Inference
    mle.twice_nll
    mle.fit
    mle.fixed_poi_fit
-   utils.generate_asimov_data
-   utils.pvals_from_teststat
-   utils.pvals_from_teststat_expected
+   calculators.generate_asimov_data
+   calculators.AsymptoticTestStatDistribution
+   calculators.AsymptoticCalculator
 
 Exceptions
 ----------

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
@@ -1,12 +1,8 @@
 """Inference for Statistical Models."""
 
 from .test_statistics import qmu
-from .utils import (
-    generate_asimov_data,
-    pvals_from_teststat,
-    pvals_from_teststat_expected,
-)
 from .. import get_backend
+from .calculators import AsymptoticCalculator
 
 
 def hypotest(
@@ -78,30 +74,39 @@ def hypotest(
     par_bounds = par_bounds or pdf.config.suggested_bounds()
     tensorlib, _ = get_backend()
 
-    asimov_mu = 0.0
-    asimov_data = generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds)
+    calc = AsymptoticCalculator(data, pdf, init_pars, par_bounds, qtilde=qtilde)
+    teststat = calc.teststatistic(poi_test)
+    sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
 
-    qmu_v = qmu(poi_test, data, pdf, init_pars, par_bounds)
-    sqrtqmu_v = tensorlib.sqrt(qmu_v)
-
-    qmuA_v = qmu(poi_test, asimov_data, pdf, init_pars, par_bounds)
-    sqrtqmuA_v = tensorlib.sqrt(qmuA_v)
-
-    CLsb, CLb, CLs = pvals_from_teststat(sqrtqmu_v, sqrtqmuA_v, qtilde=qtilde)
+    CLsb = sig_plus_bkg_distribution.pvalue(teststat)
+    CLb = b_only_distribution.pvalue(teststat)
+    CLs = CLsb / CLb
+    CLsb, CLb, CLs = (
+        tensorlib.reshape(CLsb, (1,)),
+        tensorlib.reshape(CLb, (1,)),
+        tensorlib.reshape(CLs, (1,)),
+    )
 
     _returns = [CLs]
     if kwargs.get('return_tail_probs'):
         _returns.append([CLsb, CLb])
     if kwargs.get('return_expected_set'):
         CLs_exp = []
-        for n_sigma in [-2, -1, 0, 1, 2]:
-            CLs_exp.append(pvals_from_teststat_expected(sqrtqmuA_v, nsigma=n_sigma)[-1])
+        for n_sigma in [2, 1, 0, -1, -2]:
+            CLs = sig_plus_bkg_distribution.pvalue(
+                n_sigma
+            ) / b_only_distribution.pvalue(n_sigma)
+            CLs_exp.append(tensorlib.reshape(CLs, (1,)))
         CLs_exp = tensorlib.astensor(CLs_exp)
         if kwargs.get('return_expected'):
             _returns.append(CLs_exp[2])
         _returns.append(CLs_exp)
     elif kwargs.get('return_expected'):
-        _returns.append(pvals_from_teststat_expected(sqrtqmuA_v)[-1])
+        n_sigma = 0
+        CLs = sig_plus_bkg_distribution.pvalue(n_sigma) / b_only_distribution.pvalue(
+            n_sigma
+        )
+        _returns.append(tensorlib.reshape(CLs, (1,)))
     # Enforce a consistent return type of the observed CLs
     return tuple(_returns) if len(_returns) > 1 else _returns[0]
 

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
@@ -0,0 +1,170 @@
+"""
+Calculators for Hypothesis Testing.
+
+The role of the calculators is to compute test statistic and
+provide distributions of said test statistic under various
+hypotheses.
+
+Using the calculators hypothesis tests can then be performed.
+"""
+from .mle import fixed_poi_fit
+from .. import get_backend
+from .test_statistics import qmu
+
+
+def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds):
+    """
+    Compute Asimov Dataset (expected yields at best-fit values) for a given POI value.
+
+    Args:
+        asimov_mu (`float`): The value for the parameter of interest to be used.
+        data (`tensor`): The observed data.
+        pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
+        init_pars (`tensor`): The initial parameter values to be used for fitting.
+        par_bounds (`tensor`): The parameter value bounds to be used for fitting.
+
+    Returns:
+        Tensor: The Asimov dataset.
+
+    """
+    bestfit_nuisance_asimov = fixed_poi_fit(asimov_mu, data, pdf, init_pars, par_bounds)
+    return pdf.expected_data(bestfit_nuisance_asimov)
+
+
+class AsymptoticTestStatDistribution(object):
+    """
+    The distribution the test statistic in the asymptotic case.
+
+    Note: These distributions are in :math:`-\hat{\mu}/\sigma` space.
+    In the ROOT implementation the same sigma is assumed for both hypotheses
+    and :math:`p`-values etc are computed in that space.
+    This assumption is necessarily valid, but we keep this for compatibility reasons.
+
+    In the :math:`-\hat{\mu}/\sigma` space, the test statistic (i.e. :math:`\hat{\mu}/\sigma`) is
+    normally distributed with unit variance and its mean at
+    the :math:`-\mu'`, where :math:`\mu'` is the true poi value of the hypothesis.
+    """
+
+    def __init__(self, shift):
+        """
+        Asymptotic test statistic distribution.
+
+        Args:
+            shift (`float`): The displacement of the test statistic distribution.
+
+        Returns:
+            ~pyhf.infer.calculators.AsymptoticTestStatDistribution: The asymptotic distribution of test statistic.
+
+        """
+        self.shift = shift
+        self.sqrtqmuA_v = None
+
+    def pvalue(self, value):
+        """
+        Compute the :math:`p`-value for a given value of the test statistic.
+
+        Args:
+            value (`float`): The test statistic value.
+
+        Returns:
+            Float: The integrated probability to observe a value at least as large as the observed one.
+
+        """
+        tensorlib, _ = get_backend()
+        return 1 - tensorlib.normal_cdf(value - self.shift)
+
+    def expected_value(self, nsigma):
+        """
+        Return the expected value of the test statistic.
+
+        Args:
+            nsigma (`int` or `tensor`): The number of standard deviations.
+
+        Returns:
+            Float: The expected value of the test statistic.
+        """
+        return nsigma
+
+
+class AsymptoticCalculator(object):
+    """The Asymptotic Calculator."""
+
+    def __init__(self, data, pdf, init_pars=None, par_bounds=None, qtilde=False):
+        """
+        Asymptotic Calculator.
+
+        Args:
+            data (`tensor`): The observed data.
+            pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
+            init_pars (`tensor`): The initial parameter values to be used for fitting.
+            par_bounds (`tensor`): The parameter value bounds to be used for fitting.
+            qtilde (`bool`): Whether to use qtilde as the test statistic.
+
+        Returns:
+            ~pyhf.infer.calculators.AsymptoticCalculator: The calculator for asymptotic quantities.
+
+        """
+        self.data = data
+        self.pdf = pdf
+        self.init_pars = init_pars or pdf.config.suggested_init()
+        self.par_bounds = par_bounds or pdf.config.suggested_bounds()
+        self.qtilde = qtilde
+        self.sqrtqmuA_v = None
+
+    def distributions(self, poi_test):
+        """
+        Probability Distributions of the test statistic value under the signal + background and and background-only hypothesis.
+
+        Args:
+            poi_test: The value for the parameter of interest.
+
+        Returns:
+            Tuple (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distributions under the hypotheses.
+
+        """
+        if self.sqrtqmuA_v is None:
+            raise RuntimeError('need to call .teststatistic(poi_test) first')
+        sb_dist = AsymptoticTestStatDistribution(-self.sqrtqmuA_v)
+        b_dist = AsymptoticTestStatDistribution(0.0)
+        return sb_dist, b_dist
+
+    def teststatistic(self, poi_test):
+        """
+        Compute the test statistic for the observed data under the studied model.
+
+        Args:
+            poi_test: The value for the parameter of interest.
+
+        Returns:
+            Float: the value of the test statistic.
+
+        """
+        tensorlib, _ = get_backend()
+        qmu_v = qmu(poi_test, self.data, self.pdf, self.init_pars, self.par_bounds)
+        sqrtqmu_v = tensorlib.sqrt(qmu_v)
+
+        asimov_mu = 0.0
+        asimov_data = generate_asimov_data(
+            asimov_mu, self.data, self.pdf, self.init_pars, self.par_bounds
+        )
+        qmuA_v = qmu(poi_test, asimov_data, self.pdf, self.init_pars, self.par_bounds)
+        self.sqrtqmuA_v = tensorlib.sqrt(qmuA_v)
+
+        if not self.qtilde:  # qmu
+            teststat = sqrtqmu_v - self.sqrtqmuA_v
+        else:  # qtilde
+
+            def _true_case():
+                teststat = sqrtqmu_v - self.sqrtqmuA_v
+                return teststat
+
+            def _false_case():
+                qmu = tensorlib.power(sqrtqmu_v, 2)
+                qmu_A = tensorlib.power(self.sqrtqmuA_v, 2)
+                teststat = (qmu - qmu_A) / (2 * self.sqrtqmuA_v)
+                return teststat
+
+            teststat = tensorlib.conditional(
+                (sqrtqmu_v < self.sqrtqmuA_v), _true_case, _false_case
+            )
+        return teststat
diff --git a/src/pyhf/infer/utils.py b/src/pyhf/infer/utils.py
diff --git a/tests/test_infer.py b/tests/test_infer.py
@@ -148,3 +148,12 @@ def logpdf(self, pars, data):
     )
 
     assert np.isclose(cls[0], 0.7267836451638846)
+
+
+@pytest.mark.parametrize("qtilde", [True, False])
+def test_calculator_distributions_without_teststatistic(qtilde):
+    calc = pyhf.infer.AsymptoticCalculator(
+        [0.0], {}, [1.0], [(0.0, 10.0)], qtilde=qtilde
+    )
+    with pytest.raises(RuntimeError):
+        calc.distributions(1.0)
diff --git a/tests/test_validation.py b/tests/test_validation.py
@@ -189,11 +189,11 @@ def expected_result_1bin_normsys(mu=1.0):
     if mu == 1:
         expected_result = {
             "exp": [
-                7.471694618861785e-10,
+                7.471684419037561e-10,
                 5.7411551509088054e-08,
-                3.6898088058290313e-06,
-                0.000169657315363677,
-                0.004392708998183163,
+                3.6898088062731205e-06,
+                0.00016965731538267896,
+                0.004392708998555453,
             ],
             "obs": 0.0006735317023683173,
         }
@@ -701,7 +701,6 @@ def validate_hypotest(pdf, data, mu_test, expected_result, tolerance=1e-6):
         return_expected_set=True,
         qtilde=False,
     )
-
     assert abs(CLs_obs - expected_result['obs']) / expected_result['obs'] < tolerance
     for result, expected in zip(CLs_exp_set, expected_result['exp']):
         assert abs(result - expected) / expected < tolerance