Skip to content

Commit

Permalink
Fix probability calculation for uBoostClassifier
Browse files Browse the repository at this point in the history
- Change method names uBoostBDT._uboost_predict_score and
  _uboost_staged_predict_score to *_predict_proba to avoid confusion.
- In uBoostClassifier.predict_proba and staged_predict_proba, average
  the results of individual uBoostBDTs instead of applying a sigmoid.
  • Loading branch information
Kerim Guseinov authored and arogozhnikov committed Oct 15, 2024
1 parent 442a321 commit 9cafaad
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions hep_ml/uboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,12 +360,12 @@ def staged_predict_proba(self, X):
for score in self.staged_decision_function(X):
yield commonutils.score_to_proba(score)

def _uboost_predict_score(self, X):
def _uboost_predict_proba(self, X):
"""Method added specially for uBoostClassifier"""
return sigmoid_function(self.decision_function(X) - self.score_cut,
self.smoothing)

def _uboost_staged_predict_score(self, X):
def _uboost_staged_predict_proba(self, X):
"""Method added specially for uBoostClassifier"""
for cut, score in zip(self.score_cuts_, self.staged_decision_function(X)):
yield sigmoid_function(score - cut, self.smoothing)
Expand Down Expand Up @@ -536,8 +536,9 @@ def predict_proba(self, X):
:return: array of shape [n_samples, n_classes] with probabilities.
"""
X = self._get_train_features(X)
score = sum(clf._uboost_predict_score(X) for clf in self.classifiers)
return commonutils.score_to_proba(score / self.efficiency_steps)
p = (sum(clf._uboost_predict_proba(X) for clf in self.classifiers)
/ self.efficiency_steps)
return np.array((1 - p, p)).T

def staged_predict_proba(self, X):
"""Predicted probabilities for each sample after each stage of boosting.
Expand All @@ -546,8 +547,9 @@ def staged_predict_proba(self, X):
:return: sequence of numpy.arrays of shape [n_samples, n_classes]
"""
X = self._get_train_features(X)
for scores in zip(*[clf._uboost_staged_predict_score(X) for clf in self.classifiers]):
yield commonutils.score_to_proba(sum(scores) / self.efficiency_steps)
for scores in zip(*[clf._uboost_staged_predict_proba(X) for clf in self.classifiers]):
p = sum(scores) / self.efficiency_steps
yield np.array((1 - p, p)).T


def _generate_subsample_mask(n_samples, subsample, random_generator):
Expand All @@ -560,4 +562,4 @@ def _generate_subsample_mask(n_samples, subsample, random_generator):
mask = slice(None, None, None)
else:
mask = random_generator.uniform(size=n_samples) < subsample
return mask
return mask

0 comments on commit 9cafaad

Please sign in to comment.