Skip to content

Commit

Permalink
add support for shapesys import from xml (#402)
Browse files Browse the repository at this point in the history
* add support for shapesys import from xml
  • Loading branch information
kratsg authored and lukasheinrich committed Feb 27, 2019
1 parent 0b9dd9d commit 4fa45dd
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 0 deletions.
21 changes: 21 additions & 0 deletions pyhf/readxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,27 @@ def process_sample(
'data': staterr,
}
)
elif modtag.tag == 'ShapeSys':
# NB: ConstraintType is ignored
if modtag.attrib.get('ConstraintType', 'Poisson') != 'Poisson':
log.warning(
'shapesys modifier %s has a non-poisson constraint',
modtag.attrib['Name'],
)
shapesys_data, _ = import_root_histogram(
rootdir,
modtag.attrib.get('InputFile', inputfile),
modtag.attrib.get('HistoPath', ''),
modtag.attrib['HistoName'],
)
# NB: we convert relative uncertainty to absolute uncertainty
modifiers.append(
{
'name': modtag.attrib['Name'],
'type': 'shapesys',
'data': [a * b for a, b in zip(data, shapesys_data)],
}
)
else:
log.warning('not considering modifier tag %s', modtag)

Expand Down
35 changes: 35 additions & 0 deletions tests/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import uproot
import os
import pytest


def assert_equal_dictionary(d1, d2):
Expand Down Expand Up @@ -156,3 +157,37 @@ def test_import_filecache(mocker):
)

assert_equal_dictionary(parsed_xml, parsed_xml2)


def test_import_shapesys():
parsed_xml = pyhf.readxml.parse(
'validation/xmlimport_input3/config/examples/example_ShapeSys.xml',
'validation/xmlimport_input3',
)

# build the spec, strictly checks properties included
spec = {
'channels': parsed_xml['channels'],
'parameters': parsed_xml['toplvl']['measurements'][0]['config']['parameters'],
}
pdf = pyhf.Model(spec, poiname='SigXsecOverSM')

data = [
binvalue
for k in pdf.spec['channels']
for binvalue in parsed_xml['data'][k['name']]
] + pdf.config.auxdata

channels = {channel['name']: channel for channel in pdf.spec['channels']}
samples = {
channel['name']: [sample['name'] for sample in channel['samples']]
for channel in pdf.spec['channels']
}

assert channels['channel1']['samples'][1]['modifiers'][0]['type'] == 'lumi'
assert channels['channel1']['samples'][1]['modifiers'][1]['type'] == 'shapesys'
# NB: assert that relative uncertainty is converted to absolute uncertainty for shapesys
assert channels['channel1']['samples'][1]['data'] == pytest.approx([100.0, 1.0e-4])
assert channels['channel1']['samples'][1]['modifiers'][1]['data'] == pytest.approx(
[10.0, 1.5e-5]
)
160 changes: 160 additions & 0 deletions validation/xmlimport_input3/config/examples/HistFactorySchema.dtd
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@

<!-- The top level combination spec -->
<!-- OutputFilePrefix: Prefix to the output root file to be created (inspection histograms) -->
<!-- Mode: Type of the analysis -->
<!ELEMENT Combination (Function*,Input+,Measurement*)>
<!ATTLIST Combination
OutputFilePrefix CDATA #REQUIRED
Mode CDATA #IMPLIED
>

<!-- Input files detailing the channels. One channel per file -->
<!ELEMENT Function EMPTY>
<!ATTLIST Function
Name CDATA #REQUIRED
Expression CDATA #REQUIRED
Dependents CDATA #REQUIRED
>

<!-- Input files detailing the channels. One channel per file -->
<!ELEMENT Input (#PCDATA) >

<!-- Configuration for each measurement -->
<!-- Name: to be used as the heading in the table -->
<!-- Lumi: the luminosity of the measurement -->
<!-- LumiRelErr: the relative error known for the lumi -->
<!-- BinLow: the lowest bin number used for the measurement (inclusive) -->
<!-- BinHigh: the highest bin number used for the measurement (exclusive) -->
<!-- Mode: type of the measurement (a closed list of ...) -->
<!-- ExportOnly: if "True" skip fit, only export model -->
<!ELEMENT Measurement (POI,ParamSetting*,ConstraintTerm*) >
<!ATTLIST Measurement
Name CDATA #REQUIRED
Lumi CDATA #REQUIRED
LumiRelErr CDATA #REQUIRED
BinLow CDATA #IMPLIED
BinHigh CDATA #IMPLIED
Mode CDATA #IMPLIED
ExportOnly CDATA #IMPLIED
>

<!-- Specify what you are measuring. Corresponds to the name specified in the construction
of the model in the channel setup. Typically the NormFactor for xsec measurements -->
<!ELEMENT POI (#PCDATA) >

<!-- Specify what parameters are fixed, or have particular value -->
<!-- Val: set the value of the parameter -->
<!-- Const: set this parameter constant -->
<!ELEMENT ParamSetting (#PCDATA)>
<!ATTLIST ParamSetting
Val CDATA #IMPLIED
Const CDATA #IMPLIED
>

<!-- Specify an alternative shape to use for given constraint terms (Gaussian is used if this is not specified) -->
<!-- Type: can be Gamma or Uniform -->
<!-- RelativeUncertainty: relative uncertainty on the shape -->
<!ELEMENT ConstraintTerm (#PCDATA)>
<!ATTLIST ConstraintTerm
Type CDATA #REQUIRED
RelativeUncertainty CDATA #IMPLIED
>

<!-- Top element for channels. InputFile, HistoName and HistoPath
can be set at this level in which case they will become defaul to
all subsequent elements. Otherwise they can be set in individual
subelements -->
<!ELEMENT Channel (Data*,StatErrorConfig*,Sample+)>
<!-- InputFile: input file where the input histogram can be found (use abs path) -->
<!-- HistoPath: the path (within the root file) where the histogram can be found -->
<!-- HistoName: the name of the histogram to be used for this (and following in not overridden) item -->
<!ATTLIST Channel
Name CDATA #REQUIRED
InputFile CDATA #IMPLIED
HistoPath CDATA #IMPLIED
HistoName CDATA #IMPLIED
>

<!-- Data to be fit. If you don't provide it, Asimov data will be created -->
<!-- InputFile: any item set here will override the configuration for the subelements.
For this element there is no sublemenents so the setting will only have local effects -->
<!ELEMENT Data EMPTY>
<!ATTLIST Data
InputFile CDATA #IMPLIED
HistoPath CDATA #IMPLIED
HistoName CDATA #IMPLIED
>

<!ELEMENT StatErrorConfig EMPTY>
<!ATTLIST StatErrorConfig
RelErrorThreshold CDATA #IMPLIED
ConstraintType CDATA #IMPLIED
>


<!-- Sample elements are made up of systematic variations -->
<!ELEMENT Sample (StatError | HistoSys | OverallSys | ShapeSys | NormFactor | ShapeFactor)*>
<!ATTLIST Sample
Name CDATA #REQUIRED
InputFile CDATA #IMPLIED
HistoName CDATA #IMPLIED
HistoPath CDATA #IMPLIED
NormalizeByTheory CDATA #IMPLIED
>

<!-- Systematics for which the variation is provided by histograms -->
<!ELEMENT StatError EMPTY>
<!ATTLIST StatError
Activate CDATA #REQUIRED
HistoName CDATA #IMPLIED
InputFile CDATA #IMPLIED
HistoPath CDATA #IMPLIED
>

<!ELEMENT HistoSys EMPTY>
<!ATTLIST HistoSys
Name CDATA #REQUIRED
InputFile CDATA #IMPLIED
HistoFileHigh CDATA #IMPLIED
HistoPathHigh CDATA #IMPLIED
HistoNameHigh CDATA #IMPLIED
HistoFileLow CDATA #IMPLIED
HistoPathLow CDATA #IMPLIED
HistoNameLow CDATA #IMPLIED
>

<!-- Systematics for which the variation is provided by simple overall scaling -->
<!ELEMENT OverallSys EMPTY>
<!ATTLIST OverallSys
Name CDATA #REQUIRED
High CDATA #REQUIRED
Low CDATA #REQUIRED
>

<!-- Systematics for which the variation is provided by simple overall scaling -->
<!ELEMENT ShapeSys EMPTY>
<!ATTLIST ShapeSys
Name CDATA #REQUIRED
HistoName CDATA #REQUIRED
HistoPath CDATA #IMPLIED
InputFile CDATA #IMPLIED
ConstraintType CDATA #IMPLIED
>

<!-- Scaling factor, which may be the parameter of interest for cross section measurements-->
<!ELEMENT NormFactor EMPTY>
<!ATTLIST NormFactor
Name CDATA #REQUIRED
Val CDATA #REQUIRED
High CDATA #REQUIRED
Low CDATA #REQUIRED
Const CDATA #IMPLIED
>


<!-- Systematics for which the variation is provided by simple overall scaling -->
<!ELEMENT ShapeFactor EMPTY>
<!ATTLIST ShapeFactor
Name CDATA #REQUIRED
>

28 changes: 28 additions & 0 deletions validation/xmlimport_input3/config/examples/example_ShapeSys.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<!--
//============================================================================
// Name : example.xml
//============================================================================
-->

<!--
Top-level configuration, details for the example channel are in example_channel.xml.
This is the input file to the executable.
Note: Config.dtd needs to be accessible. It can be found in ROOT release area.
The file system path is relative to location of this XML file, not the executable.
-->

<!DOCTYPE Combination SYSTEM 'HistFactorySchema.dtd'>

<Combination OutputFilePrefix="./results/example_ShapeSys" >

<Input>./config/examples/example_ShapeSys_channel.xml</Input>


<Measurement Name="GaussExample" Lumi="1." LumiRelErr="0.1" >
<POI>SigXsecOverSM</POI>
<ParamSetting Const="True">Lumi alpha_syst1</ParamSetting>
<!-- don't need <ConstraintTerm> default is Gaussian-->
</Measurement>

</Combination>
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<!--
Single channel configuration exampple.
The top level configuration XML is example.xml
NormalizedByTheory should be "True" (not "TRUE" or "true") for all non-data-driven backgrounds.
If you comment or remove the <Data> tag then it will use the expected data.
Histogram inputs should be in pb and in top-level xml the lumi should be in 1/pb
(The important thing is that they match... fb and 1/fb is also ok)
Note: Config.dtd needs to be accessible. It can be found in ROOT release area.
The file system path is relative to location of this XML file, not the executable.
-->

<!DOCTYPE Channel SYSTEM 'HistFactorySchema.dtd'>

<Channel Name="channel1" InputFile="./data/ShapeSys.root" >

<Data HistoName="data" HistoPath="" />

<StatErrorConfig RelErrorThreshold="0.05" ConstraintType="Poisson" />

<Sample Name="signal" HistoPath="" HistoName="signal">
<OverallSys Name="syst1" High="1.05" Low="0.95"/>
<NormFactor Name="SigXsecOverSM" Val="1" Low="0." High="3." />
</Sample>

<Sample Name="background1" HistoPath="" NormalizeByTheory="True" HistoName="background1" >
<ShapeSys Name="bkg1Shape" HistoName="bkg1ShapeError" />
</Sample>

<Sample Name="background2" HistoPath="" NormalizeByTheory="True" HistoName="background2">
<ShapeSys Name="bkg2Shape" HistoName="bkg2ShapeError" />
</Sample>

</Channel>
Binary file added validation/xmlimport_input3/data/ShapeSys.root
Binary file not shown.

0 comments on commit 4fa45dd

Please sign in to comment.