Skip to content

Commit

Permalink
Merge pull request #1 from efajardo-nv/dfp2-pipelines-exp2
Browse files Browse the repository at this point in the history
Explainability v2 update
  • Loading branch information
efajardo-nv authored Aug 18, 2022
2 parents b04a2fa + f61d00e commit 0309dcd
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 16 deletions.
26 changes: 12 additions & 14 deletions morpheus/stages/inference/auto_encoder_inference_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,19 @@ def process(self, batch: MultiInferenceAEMessage, cb: typing.Callable[[ResponseM
"""
data = batch.get_meta(batch.meta.df.columns.intersection(self._feature_columns))

explain_df = pd.DataFrame(np.empty((batch.count, 3), dtype=object),
columns=["num_col_max_loss", "bin_col_max_loss", "cat_col_max_loss"])
if batch.model is not None:
mse_loss, bce_loss, cce_loss, rloss_scores = batch.model.get_anomaly_score(data)
num_names, cat_names, bin_names = batch.model.return_feature_names()
vi_df = batch.model.get_variable_importance(num_names,
cat_names,
bin_names,
mse_loss,
bce_loss,
cce_loss,
data)
for col in vi_df.columns:
explain_df[col] = vi_df[col]
explain_cols = [x + "_z_loss" for x in self._feature_columns] + ["max_abs_z", "mean_abs_z"]
explain_df = pd.DataFrame(np.empty((batch.count, (len(self._feature_columns)+2)), dtype=object), columns=explain_cols)

if batch.model is not None:
rloss_scores = batch.model.get_anomaly_score(data)[3]

results = batch.model.get_results(data, return_abs=True)
scaled_z_scores = [col for col in results.columns if col.endswith('_z_loss')]
scaled_z_scores.extend(['max_abs_z', 'mean_abs_z'])
scaledz_df = results[scaled_z_scores]
for col in scaledz_df.columns:
explain_df[col] = scaledz_df[col]

zscores = (rloss_scores - batch.train_scores_mean) / batch.train_scores_std
rloss_scores = rloss_scores.reshape((batch.count, 1))
zscores = np.absolute(zscores)
Expand Down
4 changes: 2 additions & 2 deletions morpheus/stages/preprocess/train_ae_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def train(self, df: pd.DataFrame) -> AutoEncoder:
progress_bar=False)

logger.debug("Training AE model for user: '%s'...", self._user_id)
# train_df = combined_df[combined_df.columns.intersection(self._feature_columns)]
model.fit(train_df, epochs=self._epochs)
train_loss_scores = model.get_anomaly_score(train_df)[3]
scores_mean = train_loss_scores.mean()
Expand Down Expand Up @@ -274,7 +273,7 @@ def _build_single(self, builder: srf.Builder, input_stream: StreamPair) -> Strea

# Derive features here
df = self._source_stage_class.derive_features(df, self._feature_columns)
df = df.dropna(thresh=2, axis=1, how='all')
df = df.fillna("nan")
self._user_models[user_id].train(df)

if self._use_generic_model:
Expand All @@ -287,6 +286,7 @@ def _build_single(self, builder: srf.Builder, input_stream: StreamPair) -> Strea

all_users_df = pd.concat(user_to_df.values())
all_users_df = self._source_stage_class.derive_features(all_users_df, self._feature_columns)
all_users_df = all_users_df.fillna("nan")
self._user_models["generic"].train(all_users_df)

# Save trained user models
Expand Down

0 comments on commit 0309dcd

Please sign in to comment.