Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

precomputes all modifications at once #269

Merged
merged 24 commits into from
Sep 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyhf/modifiers/histosys.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .. import get_backend
from ..interpolate import interpolator

@modifier(name='histosys', constrained=True, shared=True)
@modifier(name='histosys', constrained=True, shared=True, op_code = 'addition')
class histosys(object):
"""HistoSys modifier

Expand Down
2 changes: 1 addition & 1 deletion pyhf/modifiers/normfactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from . import modifier

@modifier(name='normfactor', shared=True)
@modifier(name='normfactor', shared=True, op_code = 'multiplication')
class normfactor(object):
def __init__(self, nom_data, modifier_data):
self.n_parameters = 1
Expand Down
2 changes: 1 addition & 1 deletion pyhf/modifiers/normsys.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .. import get_backend
from ..interpolate import interpolator

@modifier(name='normsys', constrained=True, shared=True)
@modifier(name='normsys', constrained=True, shared=True, op_code = 'multiplication')
class normsys(object):
def __init__(self, nom_data, modifier_data):
self.n_parameters = 1
Expand Down
2 changes: 1 addition & 1 deletion pyhf/modifiers/shapefactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from . import modifier

@modifier(name='shapefactor', shared=True)
@modifier(name='shapefactor', shared=True, op_code = 'multiplication')
class shapefactor(object):
def __init__(self, nom_data, modifier_data):
self.n_parameters = len(nom_data)
Expand Down
2 changes: 1 addition & 1 deletion pyhf/modifiers/shapesys.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from . import modifier
from .. import get_backend

@modifier(name='shapesys', constrained=True, pdf_type='poisson')
@modifier(name='shapesys', constrained=True, pdf_type='poisson', op_code = 'multiplication')
class shapesys(object):
def __init__(self, nom_data, modifier_data):
self.n_parameters = len(nom_data)
Expand Down
2 changes: 1 addition & 1 deletion pyhf/modifiers/staterror.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from . import modifier
from .. import get_backend

@modifier(name='staterror', shared=True, constrained=True)
@modifier(name='staterror', shared=True, constrained=True, op_code = 'multiplication')
class staterror(object):
def __init__(self, nom_data, modifier_data):
self.n_parameters = len(nom_data)
Expand Down
198 changes: 165 additions & 33 deletions pyhf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def from_spec(cls,spec,poiname = 'mu', qualify_names = False):
channels.append(channel['name'])
for sample in channel['samples']:
samples.append(sample['name'])
# we need to bookkeep a list of modifiers by type so that we
# can loop over them on a type-by-type basis
# types like histosys, normsys, etc...
sample['modifiers_by_type'] = {}
for modifier_def in sample['modifiers']:
if qualify_names:
fullname = '{}/{}'.format(modifier_def['type'],modifier_def['name'])
Expand All @@ -30,6 +34,7 @@ def from_spec(cls,spec,poiname = 'mu', qualify_names = False):
modifier = instance.add_or_get_modifier(channel, sample, modifier_def)
modifier.add_sample(channel, sample, modifier_def)
modifiers.append(modifier_def['name'])
sample['modifiers_by_type'].setdefault(modifier_def['type'],[]).append(modifier_def['name'])
kratsg marked this conversation as resolved.
Show resolved Hide resolved
instance.channels = list(set(channels))
kratsg marked this conversation as resolved.
Show resolved Hide resolved
instance.samples = list(set(samples))
instance.modifiers = list(set(modifiers))
Expand Down Expand Up @@ -126,17 +131,132 @@ def __init__(self, spec, **config_kwargs):
# build up our representation of the specification
self.config = _ModelConfig.from_spec(self.spec,**config_kwargs)

def _mtype_results(self,mtype,pars):
"""
This method implements the computation of the modifier's application
for a given modifier type, for each channel and sample within that
type.

In a follow up PR it will be further refactored to reverse the order of
the three loops, such that the outer-most loop is over modifiers (which
is the structure we are aiming for in #251)

This will include additional code like

if mtype in self.combined_mods.keys():
return self.combined_mods[mtype].apply(pars)

before the loops. This returns a bookkeeping dictionary of the following structure

_mtype_results_dict == {
channel1: {
sample1: [
mod1.apply(),
mod2.apply(),
...
],
sample2: [
mod1.apply(),
mod3.apply(),
mod5.apply(),
...
]
},
channel2: {
sample2: [
mod2.apply(),
mod3.apply(),
mod4.apply()
],
...
},
...
}
"""
mtype_results = {}
for channel in self.spec['channels']:
for sample in channel['samples']:
for mname in sample['modifiers_by_type'].get(mtype,[]):
modifier = self.config.modifier(mname)
modpars = pars[self.config.par_slice(mname)]
mtype_results.setdefault(channel['name'],
{}).setdefault(sample['name'],
[]).append(
modifier.apply(channel, sample, modpars)
)
return mtype_results

kratsg marked this conversation as resolved.
Show resolved Hide resolved
def expected_sample(self, channel, sample, pars):
tensorlib, _ = get_backend()
"""
The idea is that we compute all bin-values at once.. each bin is a product of various factors, but sum are per-channel the other per-channel
Public API only, not efficient or fast. We compute all modification for
all samples in this method even though we are only interested in the
modifications in a single sample.

Alternatively the _all_modifications() could take a list of
channel/samples for which it should compute modificiations.
"""
all_modifications = self._all_modifications(pars)
return self._expected_sample(
tensorlib.astensor(sample['data']), #nominal
*all_modifications[channel['name']][sample['name']] #mods
)

kratsg marked this conversation as resolved.
Show resolved Hide resolved
def _expected_sample(self, nominal, factors, deltas):
tensorlib, _ = get_backend()

#the base value for each bin is either the nominal with deltas applied
#if there were any otherwise just the nominal
if len(deltas):
#stack all bin-wise shifts in the yield value (delta) from the modifiers
#on top of each other and sum through the first axis
#will give us a overall shift to apply to the nominal histo
all_deltas = tensorlib.sum(tensorlib.stack(deltas), axis=0)

#stack nominal and deltas and sum through first axis again
#to arrive at yield value after deltas (but before factor mods)
nominal_and_deltas = tensorlib.stack([nominal,all_deltas])
nominal_plus_deltas = tensorlib.sum(nominal_and_deltas,axis=0)
basefactor = [nominal_plus_deltas]
else:
basefactor = [nominal]

factors += basefactor

#multiplicative modifiers are either a single float that should be broadcast
#to all bins or a list of floats (one for each bin of the histogram)
binwise_factors = tensorlib.simple_broadcast(*factors)

#now we arrange all factors on top of each other so that for each bin we
#have all multiplicative factors
stacked_factors_binwise = tensorlib.stack(binwise_factors)

#binwise multiply all multiplicative factors such that we arrive
#at a single number for each bin
total_factors = tensorlib.product(stacked_factors_binwise, axis=0)
return total_factors

def _all_modifications(self, pars):
"""
This function implements the calculation of all modifications by
looping over all possible modifications and calling _mtype_results()
for each one. The results are accumulated in a nested dict-like
structure to keep track of factors/deltas that is then used by
expected_actualdata()/expected_sample().

The idea is that we compute all bin-values at once.. each bin is a
product of various factors, but sum are per-channel the other
per-channel

b1 = shapesys_1 | shapef_1 |
b2 = shapesys_2 | shapef_2 |
... normfac1 .. normfac2
... (broad) .. (broad)
bn = shapesys_n | shapef_1 |

this can be achieved by `numpy`'s `broadcast_arrays` and `np.product`. The broadcast expands the scalars or one-length arrays to an array which we can then uniformly multiply
this can be achieved by `numpy`'s `broadcast_arrays` and `np.product`.
The broadcast expands the scalars or one-length arrays to an array
which we can then uniformly multiply

>>> import numpy as np
>>> np.broadcast_arrays([2],[3,4,5],[6],[7,8,9])
Expand Down Expand Up @@ -166,7 +286,6 @@ def expected_sample(self, channel, sample, pars):
Non-shape === affects all bins the same way (just changes normalization, keeps shape the same)

.. _`CERN-OPEN-2012-016`: https://cds.cern.ch/record/1456844?ln=en

"""
# for each sample the expected ocunts are
# counts = (multiplicative factors) * (normsys multiplier) * (histsys delta + nominal hist)
Expand All @@ -181,36 +300,40 @@ def expected_sample(self, channel, sample, pars):
# \phi == normfactor, overallsys
# \sigma == histosysdelta + nominal

tensorlib, _ = get_backend()
# first, collect the factors from all modifiers
results = {'shapesys': [],
'normfactor': [],
'shapefactor': [],
'staterror': [],
'histosys': [],
'normsys': []}
for m in sample['modifiers']:
modifier, modpars = self.config.modifier(m['name']), pars[self.config.par_slice(m['name'])]
results[m['type']].append(modifier.apply(channel, sample, modpars))


# start building the entire set of factors
factors = []
# scalars that get broadcasted to shape of vectors
factors += results['normfactor']
factors += results['normsys']
# vectors
factors += results['shapesys']
factors += results['shapefactor']
factors += results['staterror']

nominal = tensorlib.astensor(sample['data'])
factors += [tensorlib.sum(tensorlib.stack([
nominal,
tensorlib.sum(tensorlib.stack(results['histosys']), axis=0)
]), axis=0) if len(results['histosys']) > 0 else nominal]

return tensorlib.product(tensorlib.stack(tensorlib.simple_broadcast(*factors)), axis=0)

all_modifications = {}
mods_and_ops = [(x,getattr(modifiers,x).op_code) for x in modifiers.__all__]
factor_mods = [x[0] for x in mods_and_ops if x[1]=='multiplication']
delta_mods = [x[0] for x in mods_and_ops if x[1]=='addition']

all_results = {}
for mtype in factor_mods + delta_mods:
all_results[mtype] = self._mtype_results(mtype,pars)

for channel in self.spec['channels']:
for sample in channel['samples']:
#concatenate list of lists using sum() and an initial value of []
#to get a list of all prefactors that should be multiplied to the
#base histogram (the deltas below + the nominal)
factors = sum([
all_results.get(x,{}).get(channel['name'],{}).get(sample['name'],[])
for x in factor_mods
],[])

#concatenate list of lists using sum() and an initial value of []
#to get a list of all deltas that should be addded to the
#nominal values
deltas = sum([
all_results.get(x,{}).get(channel['name'],{}).get(sample['name'],[])
for x in delta_mods
],[])

all_modifications.setdefault(
channel['name'],{})[
sample['name']
] = (factors, deltas)
return all_modifications
kratsg marked this conversation as resolved.
Show resolved Hide resolved

def expected_auxdata(self, pars):
# probably more correctly this should be the expectation value of the constraint_pdf
Expand All @@ -231,8 +354,17 @@ def expected_actualdata(self, pars):
tensorlib, _ = get_backend()
pars = tensorlib.astensor(pars)
data = []

all_modifications = self._all_modifications(pars)
for channel in self.spec['channels']:
data.append(tensorlib.sum(tensorlib.stack([self.expected_sample(channel, sample, pars) for sample in channel['samples']]),axis=0))
sample_stack = [
self._expected_sample(
tensorlib.astensor(sample['data']), #nominal
*all_modifications[channel['name']][sample['name']] #mods
)
for sample in channel['samples']
]
data.append(tensorlib.sum(tensorlib.stack(sample_stack),axis=0))
kratsg marked this conversation as resolved.
Show resolved Hide resolved
return tensorlib.concatenate(data)

def expected_data(self, pars, include_auxdata=True):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_import_default_modifiers(test_modifierPair):
assert hasattr(modifier, 'pdf_type')
assert hasattr(modifier, 'op_code')
assert modifier.pdf_type == test_mod_type
assert modifier.op_code == 'addition'
assert modifier.op_code in ['addition','multiplication']

# we make sure modifiers have right structure
def test_modifiers_structure():
Expand Down