From b27175ec1aee93ed0f32deb0715e5397d6d8842e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Mon, 30 Jan 2023 19:05:51 +0100
Subject: [PATCH] Update tests

---
 tests/tests_pytorch/accelerators/test_ipu.py  |  9 ---
 tests/tests_pytorch/accelerators/test_tpu.py  |  1 -
 .../progress/test_tqdm_progress_bar.py        |  2 -
 .../callbacks/test_callback_hook_outputs.py   | 22 ------
 .../callbacks/test_early_stopping.py          | 12 +--
 .../callbacks/test_lr_monitor.py              |  3 -
 .../test_checkpoint_callback_frequency.py     |  2 +-
 .../checkpointing/test_model_checkpoint.py    | 14 +---
 .../checkpointing/test_trainer_checkpoint.py  |  1 -
 tests/tests_pytorch/core/test_datamodules.py  |  5 --
 .../core/test_lightning_module.py             |  2 -
 .../core/test_lightning_optimizer.py          |  7 --
 .../helpers/deterministic_model.py            | 14 ----
 tests/tests_pytorch/loggers/test_all.py       | 10 +--
 tests/tests_pytorch/loggers/test_logger.py    |  4 +-
 tests/tests_pytorch/loggers/test_neptune.py   |  2 +-
 .../tests_pytorch/loggers/test_tensorboard.py |  1 -
 .../loops/epoch/test_training_epoch_loop.py   | 52 ------------
 .../loops/optimization/test_optimizer_loop.py |  4 -
 .../loops/test_evaluation_loop.py             | 30 -------
 .../loops/test_evaluation_loop_flow.py        | 26 ------
 .../tests_pytorch/loops/test_flow_warnings.py |  1 -
 tests/tests_pytorch/loops/test_loops.py       |  3 -
 .../tests_pytorch/loops/test_training_loop.py |  5 --
 .../loops/test_training_loop_flow_dict.py     | 30 -------
 .../loops/test_training_loop_flow_scalar.py   | 36 +--------
 tests/tests_pytorch/models/test_hooks.py      | 54 +------------
 .../plugins/test_double_plugin.py             |  3 +-
 .../strategies/test_deepspeed_strategy.py     |  1 -
 tests/tests_pytorch/strategies/test_dp.py     | 27 ++++---
 .../trainer/connectors/test_data_connector.py |  1 -
 .../test_multiple_eval_dataloaders.py         | 46 +----------
 .../trainer/flags/test_fast_dev_run.py        | 18 ++---
 .../trainer/flags/test_min_max_epochs.py      |  1 -
 .../logging_/test_distributed_logging.py      |  1 -
 .../logging_/test_eval_loop_logging.py        | 60 +++++++-------
 .../trainer/logging_/test_logger_connector.py | 41 +---------
 .../trainer/logging_/test_loop_logging.py     |  3 -
 .../logging_/test_train_loop_logging.py       | 27 +++----
 .../optimization/test_manual_optimization.py  | 79 ++-----------------
 .../optimization/test_multiple_optimizers.py  | 15 ----
 .../trainer/optimization/test_optimizers.py   |  7 --
 .../trainer/test_config_validator.py          | 31 ++++++++
 .../tests_pytorch/trainer/test_dataloaders.py | 27 +------
 tests/tests_pytorch/trainer/test_trainer.py   | 22 ------
 .../tuner/test_scale_batch_size.py            |  1 -
 .../utilities/test_all_gather_grad.py         | 17 ++--
 .../utilities/test_auto_restart.py            |  6 +-
 .../tests_pytorch/utilities/test_fetching.py  |  4 +-
 49 files changed, 149 insertions(+), 641 deletions(-)

diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py
index 08853b5d7ae26..3a7ed44923537 100644
--- a/tests/tests_pytorch/accelerators/test_ipu.py
+++ b/tests/tests_pytorch/accelerators/test_ipu.py
@@ -47,15 +47,6 @@ def validation_step(self, batch, batch_idx):
     def test_step(self, batch, batch_idx):
         return self.step(batch)
 
-    def training_epoch_end(self, outputs) -> None:
-        pass
-
-    def validation_epoch_end(self, outputs) -> None:
-        pass
-
-    def test_epoch_end(self, outputs) -> None:
-        pass
-
 
 class IPUClassificationModel(ClassificationModel):
     def training_step(self, batch, batch_idx):
diff --git a/tests/tests_pytorch/accelerators/test_tpu.py b/tests/tests_pytorch/accelerators/test_tpu.py
index 8a2d4a5137e2e..136a477557d59 100644
--- a/tests/tests_pytorch/accelerators/test_tpu.py
+++ b/tests/tests_pytorch/accelerators/test_tpu.py
@@ -155,7 +155,6 @@ def on_train_end(self):
     model = ManualOptimizationModel()
     model_copy = deepcopy(model)
     model.training_step_end = None
-    model.training_epoch_end = None
 
     trainer = Trainer(
         max_epochs=1,
diff --git a/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py b/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py
index 79b9e71c370a3..d1f0887a5ab24 100644
--- a/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py
+++ b/tests/tests_pytorch/callbacks/progress/test_tqdm_progress_bar.py
@@ -136,8 +136,6 @@ def predict_step(self, batch, batch_idx, dataloader_idx=None):
             return
 
     model = CustomModel()
-    model.validation_epoch_end = None
-    model.test_epoch_end = None
 
     # check the sanity dataloaders
     num_sanity_val_steps = 4
diff --git a/tests/tests_pytorch/callbacks/test_callback_hook_outputs.py b/tests/tests_pytorch/callbacks/test_callback_hook_outputs.py
index dd70874f96fdc..25b2011e80697 100644
--- a/tests/tests_pytorch/callbacks/test_callback_hook_outputs.py
+++ b/tests/tests_pytorch/callbacks/test_callback_hook_outputs.py
@@ -41,9 +41,6 @@ def on_validation_batch_end(self, outputs, batch, batch_idx: int, dataloader_idx
         def on_test_batch_end(self, outputs, batch, batch_idx: int, dataloader_idx: int) -> None:
             assert "x" in outputs
 
-        def training_epoch_end(self, outputs) -> None:
-            assert len(outputs) == self.trainer.num_training_batches
-
     model = TestModel()
 
     trainer = Trainer(
@@ -59,22 +56,3 @@ def training_epoch_end(self, outputs) -> None:
     assert any(isinstance(c, CB) for c in trainer.callbacks)
 
     trainer.fit(model)
-
-
-def test_free_memory_on_eval_outputs(tmpdir):
-    class CB(Callback):
-        def on_train_epoch_end(self, trainer, pl_module):
-            assert not trainer._evaluation_loop._outputs
-
-    model = BoringModel()
-
-    trainer = Trainer(
-        callbacks=CB(),
-        default_root_dir=tmpdir,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        max_epochs=1,
-        enable_model_summary=False,
-    )
-
-    trainer.fit(model)
diff --git a/tests/tests_pytorch/callbacks/test_early_stopping.py b/tests/tests_pytorch/callbacks/test_early_stopping.py
index 7663a53212427..db9a83aee3378 100644
--- a/tests/tests_pytorch/callbacks/test_early_stopping.py
+++ b/tests/tests_pytorch/callbacks/test_early_stopping.py
@@ -134,7 +134,7 @@ def test_early_stopping_patience(tmpdir, loss_values: list, patience: int, expec
     class ModelOverrideValidationReturn(BoringModel):
         validation_return_values = torch.tensor(loss_values)
 
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             loss = self.validation_return_values[self.current_epoch]
             self.log("test_val_loss", loss)
 
@@ -164,7 +164,7 @@ def test_early_stopping_patience_train(
     class ModelOverrideTrainReturn(BoringModel):
         train_return_values = torch.tensor(loss_values)
 
-        def training_epoch_end(self, outputs):
+        def on_train_epoch_end(self):
             loss = self.train_return_values[self.current_epoch]
             self.log("train_loss", loss)
 
@@ -226,7 +226,7 @@ def test_early_stopping_no_val_step(tmpdir):
 )
 def test_early_stopping_thresholds(tmpdir, stopping_threshold, divergence_threshold, losses, expected_epoch):
     class CurrentModel(BoringModel):
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             val_loss = losses[self.current_epoch]
             self.log("abc", val_loss)
 
@@ -252,7 +252,7 @@ def test_early_stopping_on_non_finite_monitor(tmpdir, stop_value):
     expected_stop_epoch = 2
 
     class CurrentModel(BoringModel):
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             val_loss = losses[self.current_epoch]
             self.log("val_loss", val_loss)
 
@@ -352,12 +352,12 @@ def _epoch_end(self) -> None:
         self.log("abc", torch.tensor(loss))
         self.log("cba", torch.tensor(0))
 
-    def training_epoch_end(self, outputs):
+    def on_train_epoch_end(self):
         if not self.early_stop_on_train:
             return
         self._epoch_end()
 
-    def validation_epoch_end(self, outputs):
+    def on_validation_epoch_end(self):
         if self.early_stop_on_train:
             return
         self._epoch_end()
diff --git a/tests/tests_pytorch/callbacks/test_lr_monitor.py b/tests/tests_pytorch/callbacks/test_lr_monitor.py
index 1a54dd15dec84..6262c86800274 100644
--- a/tests/tests_pytorch/callbacks/test_lr_monitor.py
+++ b/tests/tests_pytorch/callbacks/test_lr_monitor.py
@@ -248,7 +248,6 @@ def configure_optimizers(self):
             return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2]
 
     model = CustomBoringModel()
-    model.training_epoch_end = None
 
     lr_monitor = LearningRateMonitor(logging_interval=logging_interval)
     log_every_n_steps = 2
@@ -306,7 +305,6 @@ def configure_optimizers(self):
             return [optimizer1, optimizer2]
 
     model = CustomBoringModel()
-    model.training_epoch_end = None
 
     lr_monitor = LearningRateMonitor(logging_interval=logging_interval)
     log_every_n_steps = 2
@@ -563,7 +561,6 @@ def finetune_function(self, pl_module, epoch: int, optimizer, opt_idx: int):
         enable_checkpointing=False,
     )
     model = TestModel()
-    model.training_epoch_end = None
     trainer.fit(model)
 
     expected = [0.1, 0.1, 0.1, 0.1, 0.1]
diff --git a/tests/tests_pytorch/checkpointing/test_checkpoint_callback_frequency.py b/tests/tests_pytorch/checkpointing/test_checkpoint_callback_frequency.py
index 8d582117591b0..b0d58d935603e 100644
--- a/tests/tests_pytorch/checkpointing/test_checkpoint_callback_frequency.py
+++ b/tests/tests_pytorch/checkpointing/test_checkpoint_callback_frequency.py
@@ -96,7 +96,7 @@ def training_step(self, batch, batch_idx):
             self.log("my_loss", batch_idx * (1 + local_rank), on_epoch=True)
             return super().training_step(batch, batch_idx)
 
-        def training_epoch_end(self, outputs) -> None:
+        def on_train_epoch_end(self):
             local_rank = int(os.getenv("LOCAL_RANK"))
             if self.trainer.is_global_zero:
                 self.log("my_loss_2", (1 + local_rank), on_epoch=True, rank_zero_only=True)
diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
index 5f1bbc1ec8684..5f24e45362185 100644
--- a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
+++ b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
@@ -59,9 +59,8 @@ def training_step(self, batch, batch_idx):
         self.log("early_stop_on", out["loss"])
         return out
 
-    def validation_epoch_end(self, outputs):
-        outs = torch.stack([x["x"] for x in outputs]).mean()
-        self.log("val_acc", outs)
+    def on_validation_epoch_end(self):
+        self.log("val_acc", torch.tensor(1.23))
 
 
 def mock_training_epoch_loop(trainer):
@@ -214,9 +213,8 @@ def validation_step(self, batch, batch_idx):
             self.log("val_log", log_value)
             return super().validation_step(batch, batch_idx)
 
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             self.val_loop_count += 1
-            super().validation_epoch_end(outputs)
             self.scores.append(self.trainer.logged_metrics[monitor])
 
         def configure_optimizers(self):
@@ -829,7 +827,7 @@ def test_checkpointing_with_nan_as_first(tmpdir, mode):
     monitor += [5, 7, 8] if mode == "max" else [8, 7, 5]
 
     class CurrentModel(LogInTwoMethods):
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             val_loss = monitor[self.current_epoch]
             self.log("abc", val_loss)
 
@@ -863,7 +861,6 @@ def validation_step(self, batch, batch_idx):
             self.log("val_loss", loss)
 
     model = ExtendedBoringModel()
-    model.validation_epoch_end = None
     trainer_kwargs = {
         "max_epochs": 1,
         "limit_train_batches": 2,
@@ -901,9 +898,6 @@ def validation_step(self, batch, batch_idx):
             self.log("val_loss", loss)
             return {"val_loss": loss}
 
-        def validation_epoch_end(self, *_):
-            ...
-
     def assert_trainer_init(trainer):
         assert trainer.global_step == 0
         assert trainer.current_epoch == 0
diff --git a/tests/tests_pytorch/checkpointing/test_trainer_checkpoint.py b/tests/tests_pytorch/checkpointing/test_trainer_checkpoint.py
index b5d52835b4cb0..83d788c4185ef 100644
--- a/tests/tests_pytorch/checkpointing/test_trainer_checkpoint.py
+++ b/tests/tests_pytorch/checkpointing/test_trainer_checkpoint.py
@@ -41,7 +41,6 @@ def validation_step(self, batch, batch_idx):
             self.log("val_loss", loss, on_epoch=True, prog_bar=True)
 
     model = ExtendedBoringModel()
-    model.validation_epoch_end = None
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
diff --git a/tests/tests_pytorch/core/test_datamodules.py b/tests/tests_pytorch/core/test_datamodules.py
index 5ce1a452d6e3c..af2d51fc38bd2 100644
--- a/tests/tests_pytorch/core/test_datamodules.py
+++ b/tests/tests_pytorch/core/test_datamodules.py
@@ -163,10 +163,8 @@ def test_train_loop_only(tmpdir):
 
     model.validation_step = None
     model.validation_step_end = None
-    model.validation_epoch_end = None
     model.test_step = None
     model.test_step_end = None
-    model.test_epoch_end = None
 
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, enable_model_summary=False)
 
@@ -185,7 +183,6 @@ def test_train_val_loop_only(tmpdir):
 
     model.validation_step = None
     model.validation_step_end = None
-    model.validation_epoch_end = None
 
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, enable_model_summary=False)
 
@@ -278,10 +275,8 @@ def train_dataloader(self):
 
     model.validation_step = None
     model.validation_step_end = None
-    model.validation_epoch_end = None
     model.test_step = None
     model.test_step_end = None
-    model.test_epoch_end = None
 
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=3, limit_train_batches=2, reload_dataloaders_every_n_epochs=2)
     trainer.fit(model, dm)
diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py
index 401fce017e8ee..876a9cefb4eae 100644
--- a/tests/tests_pytorch/core/test_lightning_module.py
+++ b/tests/tests_pytorch/core/test_lightning_module.py
@@ -178,7 +178,6 @@ def configure_optimizers(self):
             return [optimizer_1, optimizer_2]
 
     model = TestModel()
-    model.training_epoch_end = None
 
     trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, limit_train_batches=8, limit_val_batches=0)
     trainer.fit(model)
@@ -281,7 +280,6 @@ def configure_optimizers(self):
             return [optimizer_1, optimizer_2, optimizer_3]
 
     model = TestModel()
-    model.training_epoch_end = None
     trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, limit_train_batches=8)
     trainer.fit(model)
 
diff --git a/tests/tests_pytorch/core/test_lightning_optimizer.py b/tests/tests_pytorch/core/test_lightning_optimizer.py
index 29f7cd60ad681..5ac6e10ae2585 100644
--- a/tests/tests_pytorch/core/test_lightning_optimizer.py
+++ b/tests/tests_pytorch/core/test_lightning_optimizer.py
@@ -109,7 +109,6 @@ def configure_optimizers(self):
 
     model = TestModel()
     model.training_step_end = None
-    model.training_epoch_end = None
     trainer = Trainer(
         default_root_dir=tmpdir, limit_train_batches=8, limit_val_batches=1, max_epochs=1, enable_model_summary=False
     )
@@ -166,9 +165,6 @@ def test_lightning_optimizer_automatic_optimization_optimizer_zero_grad(tmpdir):
     """Test overriding zero_grad works in automatic_optimization."""
 
     class TestModel(BoringModel):
-        def training_epoch_end(self, outputs):
-            ...
-
         def optimizer_zero_grad(self, epoch, batch_idx, optimizer, optimizer_idx):
             if batch_idx % 2 == 0:
                 optimizer.zero_grad()
@@ -195,9 +191,6 @@ class TestModel(BoringModel):
         def training_step(self, batch, batch_idx, optimizer_idx=None):
             return super().training_step(batch, batch_idx)
 
-        def training_epoch_end(self, outputs):
-            ...
-
         def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, **_):
             assert isinstance(optimizer_closure, Closure)
             # zero_grad is called inside the closure
diff --git a/tests/tests_pytorch/helpers/deterministic_model.py b/tests/tests_pytorch/helpers/deterministic_model.py
index fff8445f618dd..befc87cda065e 100644
--- a/tests/tests_pytorch/helpers/deterministic_model.py
+++ b/tests/tests_pytorch/helpers/deterministic_model.py
@@ -24,11 +24,9 @@ def __init__(self, weights=None):
 
         self.training_step_called = False
         self.training_step_end_called = False
-        self.training_epoch_end_called = False
 
         self.validation_step_called = False
         self.validation_step_end_called = False
-        self.validation_epoch_end_called = False
 
         self.assert_backward = True
 
@@ -74,18 +72,6 @@ def validation_step_end(self, val_step_output):
 
         return val_step_output
 
-    def validation_epoch_end(self, outputs):
-        assert len(outputs) == self.trainer.num_val_batches[0]
-
-        for i, out in enumerate(outputs):
-            assert out["log"]["log_acc1"] >= 12 + i
-
-        self.validation_epoch_end_called = True
-
-        result = outputs[-1]
-        result["val_epoch_end"] = torch.tensor(1233)
-        return result
-
     # -----------------------------
     # DATA
     # -----------------------------
diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py
index 78eb59be2cf19..beaffcb3d094b 100644
--- a/tests/tests_pytorch/loggers/test_all.py
+++ b/tests/tests_pytorch/loggers/test_all.py
@@ -97,13 +97,11 @@ def training_step(self, batch, batch_idx):
             self.log("train_some_val", loss)
             return {"loss": loss}
 
-        def validation_epoch_end(self, outputs) -> None:
-            avg_val_loss = torch.stack([x["x"] for x in outputs]).mean()
-            self.log_dict({"early_stop_on": avg_val_loss, "val_loss": avg_val_loss**0.5})
+        def on_validation_epoch_end(self):
+            self.log_dict({"early_stop_on": torch.tensor(1), "val_loss": torch.tensor(0.5)})
 
-        def test_epoch_end(self, outputs) -> None:
-            avg_test_loss = torch.stack([x["y"] for x in outputs]).mean()
-            self.log("test_loss", avg_test_loss)
+        def on_test_epoch_end(self):
+            self.log("test_loss", torch.tensor(2))
 
     class StoreHistoryLogger(logger_class):
         def __init__(self, *args, **kwargs) -> None:
diff --git a/tests/tests_pytorch/loggers/test_logger.py b/tests/tests_pytorch/loggers/test_logger.py
index f6c72814ed41a..bedc84948ccd3 100644
--- a/tests/tests_pytorch/loggers/test_logger.py
+++ b/tests/tests_pytorch/loggers/test_logger.py
@@ -147,11 +147,11 @@ def log_metrics(self, metrics, step):
             super().log_metrics(metrics, step)
 
     class CustomModel(BoringModel):
-        def training_epoch_end(self, outputs):
+        def on_train_epoch_end(self):
             self.logger.logged_step += 1
             self.log_dict({"step": self.logger.logged_step, "train_acc": self.logger.logged_step / 10})
 
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             self.logger.logged_step += 1
             self.log_dict({"step": self.logger.logged_step, "val_acc": self.logger.logged_step / 10})
 
diff --git a/tests/tests_pytorch/loggers/test_neptune.py b/tests/tests_pytorch/loggers/test_neptune.py
index daf33b3759e40..0c2f2344906cc 100644
--- a/tests/tests_pytorch/loggers/test_neptune.py
+++ b/tests/tests_pytorch/loggers/test_neptune.py
@@ -208,7 +208,7 @@ def test_neptune_log_metrics_on_trained_model(self, neptune):
         """Verify that trained models do log data."""
         # given
         class LoggingModel(BoringModel):
-            def validation_epoch_end(self, outputs):
+            def on_validation_epoch_end(self):
                 self.log("some/key", 42)
 
         # and
diff --git a/tests/tests_pytorch/loggers/test_tensorboard.py b/tests/tests_pytorch/loggers/test_tensorboard.py
index 87264216f2b48..f348d02f27ce8 100644
--- a/tests/tests_pytorch/loggers/test_tensorboard.py
+++ b/tests/tests_pytorch/loggers/test_tensorboard.py
@@ -258,7 +258,6 @@ def training_step(self, *args):
             return super().training_step(*args)
 
     model = TestModel()
-    model.training_epoch_end = None
     logger_0 = TensorBoardLogger(tmpdir, default_hp_metric=False)
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/loops/epoch/test_training_epoch_loop.py b/tests/tests_pytorch/loops/epoch/test_training_epoch_loop.py
index 6fefe498e7554..ec2c77302bb23 100644
--- a/tests/tests_pytorch/loops/epoch/test_training_epoch_loop.py
+++ b/tests/tests_pytorch/loops/epoch/test_training_epoch_loop.py
@@ -41,14 +41,6 @@ def prepare_outputs(self, fn, batch_outputs, num_optimizers, automatic_optimizat
             num_optimizers=num_optimizers,  # does not matter for manual optimization
         )
 
-    def prepare_outputs_training_epoch_end(self, batch_outputs, num_optimizers, automatic_optimization=True):
-        return self.prepare_outputs(
-            _TrainingEpochLoop._prepare_outputs_training_epoch_end,
-            batch_outputs,
-            num_optimizers,
-            automatic_optimization=automatic_optimization,
-        )
-
     def prepare_outputs_training_batch_end(self, batch_outputs, num_optimizers, automatic_optimization=True):
         return self.prepare_outputs(
             _TrainingEpochLoop._prepare_outputs_training_batch_end,
@@ -57,50 +49,6 @@ def prepare_outputs_training_batch_end(self, batch_outputs, num_optimizers, auto
             automatic_optimization=automatic_optimization,
         )
 
-    @pytest.mark.parametrize(
-        "num_optimizers,batch_outputs,expected",
-        [
-            (1, [], []),
-            (1, [[]], []),
-            # 1 batch
-            (1, [[{0: _out00}]], [_out00]),
-            # 2 batches
-            (1, [[{0: _out00}], [{0: _out01}]], [_out00, _out01]),
-            # 1 batch, 2 optimizers
-            (2, [[{0: _out00, 1: _out01}]], [_out00, _out01]),
-            # 2 batches, 2 optimizers
-            (2, [[{0: _out00, 1: _out01}], [{0: _out10, 1: _out11}]], [[_out00, _out01], [_out10, _out11]]),
-            # 4 batches, 2 optimizers, different frequency
-            (
-                2,
-                [[{0: _out00}], [{1: _out10}], [{1: _out11}], [{0: _out01}]],
-                [[_out00], [_out10], [_out11], [_out01]],
-            ),
-        ],
-    )
-    def test_prepare_outputs_training_epoch_end_automatic(self, num_optimizers, batch_outputs, expected):
-        """Test that the loop converts the nested lists of outputs to the format that the `training_epoch_end` hook
-        currently expects in the case of automatic optimization."""
-        assert self.prepare_outputs_training_epoch_end(batch_outputs, num_optimizers) == expected
-
-    @pytest.mark.parametrize(
-        "batch_outputs,expected",
-        [
-            ([], []),
-            ([[]], []),
-            # 1 batch
-            ([[_out00]], [_out00]),
-            # 2 batches
-            ([[_out00], [_out01]], [_out00, _out01]),
-            # skipped outputs
-            ([[_out00], [], [], [_out03]], [_out00, _out03]),
-        ],
-    )
-    def test_prepare_outputs_training_epoch_end_manual(self, batch_outputs, expected):
-        """Test that the loop converts the nested lists of outputs to the format that the `training_epoch_end` hook
-        currently expects in the case of manual optimization."""
-        assert self.prepare_outputs_training_epoch_end(batch_outputs, -1, automatic_optimization=False) == expected
-
     @pytest.mark.parametrize(
         "num_optimizers,batch_end_outputs,expected",
         [
diff --git a/tests/tests_pytorch/loops/optimization/test_optimizer_loop.py b/tests/tests_pytorch/loops/optimization/test_optimizer_loop.py
index 27826d5fd88eb..4d91af6dd99da 100644
--- a/tests/tests_pytorch/loops/optimization/test_optimizer_loop.py
+++ b/tests/tests_pytorch/loops/optimization/test_optimizer_loop.py
@@ -114,7 +114,6 @@ def configure_optimizers(self):
             return {"optimizer": opt0, "frequency": frequencies[0]}, {"optimizer": opt1, "frequency": frequencies[1]}
 
     model = CurrentModel()
-    model.training_epoch_end = None
     model.optimizer_step = Mock(wraps=model.optimizer_step)
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -178,7 +177,6 @@ def configure_optimizers(self):
     fail = False
     seed_everything(0)
     model = MultipleOptimizerModel()
-    model.training_epoch_end = None
     model.optimizer_step = Mock(wraps=model.optimizer_step)
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -197,7 +195,6 @@ def configure_optimizers(self):
     fail = True
     seed_everything(0)
     model = MultipleOptimizerModel()
-    model.training_epoch_end = None
     model.optimizer_step = Mock(wraps=model.optimizer_step)
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -217,7 +214,6 @@ def configure_optimizers(self):
     fail = False
     seed_everything(0)
     model = MultipleOptimizerModel()
-    model.training_epoch_end = None
     model.optimizer_step = Mock(wraps=model.optimizer_step)
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/loops/test_evaluation_loop.py b/tests/tests_pytorch/loops/test_evaluation_loop.py
index 8db9f2173f08c..0f3164e8e6d2b 100644
--- a/tests/tests_pytorch/loops/test_evaluation_loop.py
+++ b/tests/tests_pytorch/loops/test_evaluation_loop.py
@@ -20,7 +20,6 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
-from pytorch_lightning.utilities.model_helpers import is_overridden
 from tests_pytorch.helpers.runif import RunIf
 
 
@@ -179,32 +178,3 @@ def validation_step(self, batch, batch_idx):
         enable_model_summary=False,
     )
     trainer.fit(BoringLargeBatchModel())
-
-
-def test_evaluation_loop_doesnt_store_outputs_if_epoch_end_not_overridden(tmpdir):
-    did_assert = False
-
-    class TestModel(BoringModel):
-        def on_test_batch_end(self, outputs, *_):
-            # check `test_step` returns something
-            assert outputs is not None
-
-    model = TestModel()
-    model.test_epoch_end = None
-    assert not is_overridden("test_epoch_end", model)
-
-    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=3)
-    loop = trainer.test_loop.epoch_loop
-    original_advance = loop.advance
-
-    def assert_on_advance_end(*args, **kwargs):
-        original_advance(*args, **kwargs)
-        # should be empty
-        assert not loop._outputs
-        # sanity check
-        nonlocal did_assert
-        did_assert = True
-
-    loop.advance = assert_on_advance_end
-    trainer.test(model)
-    assert did_assert
diff --git a/tests/tests_pytorch/loops/test_evaluation_loop_flow.py b/tests/tests_pytorch/loops/test_evaluation_loop_flow.py
index d244d6e08a78f..560acccfb16fe 100644
--- a/tests/tests_pytorch/loops/test_evaluation_loop_flow.py
+++ b/tests/tests_pytorch/loops/test_evaluation_loop_flow.py
@@ -45,7 +45,6 @@ def backward(self, loss, optimizer, optimizer_idx):
 
     model = TestModel()
     model.validation_step_end = None
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -60,7 +59,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.validation_step_called
     assert not model.validation_step_end_called
-    assert not model.validation_epoch_end_called
 
     # simulate training manually
     trainer.state.stage = RunningStage.TRAINING
@@ -106,7 +104,6 @@ def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
     model = TestModel()
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -121,7 +118,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.validation_step_called
     assert model.validation_step_end_called
-    assert not model.validation_epoch_end_called
 
     trainer.state.stage = RunningStage.TRAINING
     # make sure training outputs what is expected
@@ -159,16 +155,6 @@ def validation_step(self, batch, batch_idx):
                 self.out_b = out
             return out
 
-        def validation_epoch_end(self, outputs):
-            self.validation_epoch_end_called = True
-            assert len(outputs) == 2
-
-            out_a = outputs[0]
-            out_b = outputs[1]
-
-            assert out_a == self.out_a
-            assert out_b == self.out_b
-
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
@@ -189,7 +175,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.validation_step_called
     assert not model.validation_step_end_called
-    assert model.validation_epoch_end_called
 
 
 def test__validation_step__step_end__epoch_end__flow(tmpdir):
@@ -218,16 +203,6 @@ def validation_step_end(self, out):
             assert self.last_out == out
             return out
 
-        def validation_epoch_end(self, outputs):
-            self.validation_epoch_end_called = True
-            assert len(outputs) == 2
-
-            out_a = outputs[0]
-            out_b = outputs[1]
-
-            assert out_a == self.out_a
-            assert out_b == self.out_b
-
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
@@ -247,4 +222,3 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.validation_step_called
     assert model.validation_step_end_called
-    assert model.validation_epoch_end_called
diff --git a/tests/tests_pytorch/loops/test_flow_warnings.py b/tests/tests_pytorch/loops/test_flow_warnings.py
index 981c2d4b95ef0..26d5aa1ad7802 100644
--- a/tests/tests_pytorch/loops/test_flow_warnings.py
+++ b/tests/tests_pytorch/loops/test_flow_warnings.py
@@ -27,7 +27,6 @@ def test_no_depre_without_epoch_end(tmpdir):
     """Tests that only training_step can be used."""
 
     model = TestModel()
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/loops/test_loops.py b/tests/tests_pytorch/loops/test_loops.py
index 0944c42fe2be1..43412ce651303 100644
--- a/tests/tests_pytorch/loops/test_loops.py
+++ b/tests/tests_pytorch/loops/test_loops.py
@@ -216,7 +216,6 @@ def val_dataloader(self):
             return [super(ValidationModel, self).val_dataloader() for _ in range(n_dataloaders)]
 
     model = ValidationModel()
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -297,7 +296,6 @@ def configure_optimizers_multiple(self):
             return optimizers, lr_schedulers
 
     model = TestModel()
-    model.training_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -494,7 +492,6 @@ def train_dataloader(self):
             return DataLoader(RandomDataset(32, n_batches))
 
     model = TestModel()
-    model.training_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/loops/test_training_loop.py b/tests/tests_pytorch/loops/test_training_loop.py
index e989d6bec3340..0f921028bbd6d 100644
--- a/tests/tests_pytorch/loops/test_training_loop.py
+++ b/tests/tests_pytorch/loops/test_training_loop.py
@@ -42,11 +42,6 @@ def on_train_batch_end(self, outputs, batch, batch_idx):
             HookedModel._check_output(outputs)
             super().on_train_batch_end(outputs, batch, batch_idx)
 
-        def training_epoch_end(self, outputs):
-            assert len(outputs) == 2
-            [HookedModel._check_output(output) for output in outputs]
-            super().training_epoch_end(outputs)
-
     model = HookedModel()
 
     # fit model
diff --git a/tests/tests_pytorch/loops/test_training_loop_flow_dict.py b/tests/tests_pytorch/loops/test_training_loop_flow_dict.py
index e82519ad6021f..230e3f8cc73e1 100644
--- a/tests/tests_pytorch/loops/test_training_loop_flow_dict.py
+++ b/tests/tests_pytorch/loops/test_training_loop_flow_dict.py
@@ -49,7 +49,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert not model.training_step_end_called
-    assert not model.training_epoch_end_called
 
 
 def test__training_step__tr_step_end__flow_dict(tmpdir):
@@ -88,7 +87,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert model.training_step_end_called
-    assert not model.training_epoch_end_called
 
 
 def test__training_step__epoch_end__flow_dict(tmpdir):
@@ -103,19 +101,6 @@ def training_step(self, batch, batch_idx):
             out = {"loss": acc, "random_things": [1, "a", torch.tensor(2)], "batch_idx": batch_idx}
             return out
 
-        def training_epoch_end(self, outputs):
-            self.training_epoch_end_called = True
-
-            # verify we saw the current num of batches
-            assert len(outputs) == 2
-            assert len({id(output) for output in outputs}) == 2
-            assert [output["batch_idx"] for output in outputs] == [0, 1]
-
-            for b in outputs:
-                assert isinstance(b, dict)
-                assert self.count_num_graphs(b) == 0
-                assert {"random_things", "loss", "batch_idx"} == set(b.keys())
-
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
@@ -135,7 +120,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert not model.training_step_end_called
-    assert model.training_epoch_end_called
 
 
 def test__training_step__step_end__epoch_end__flow_dict(tmpdir):
@@ -156,19 +140,6 @@ def training_step_end(self, tr_step_output):
             self.training_step_end_called = True
             return tr_step_output
 
-        def training_epoch_end(self, outputs):
-            self.training_epoch_end_called = True
-
-            # verify we saw the current num of batches
-            assert len(outputs) == 2
-            assert len({id(output) for output in outputs}) == 2
-            assert [output["batch_idx"] for output in outputs] == [0, 1]
-
-            for b in outputs:
-                assert isinstance(b, dict)
-                assert self.count_num_graphs(b) == 0
-                assert {"random_things", "loss", "batch_idx"} == set(b.keys())
-
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
@@ -188,4 +159,3 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert model.training_step_end_called
-    assert model.training_epoch_end_called
diff --git a/tests/tests_pytorch/loops/test_training_loop_flow_scalar.py b/tests/tests_pytorch/loops/test_training_loop_flow_scalar.py
index d9dd5fc341d47..0a91af35b1267 100644
--- a/tests/tests_pytorch/loops/test_training_loop_flow_scalar.py
+++ b/tests/tests_pytorch/loops/test_training_loop_flow_scalar.py
@@ -54,7 +54,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert not model.training_step_end_called
-    assert not model.training_epoch_end_called
 
 
 def test__training_step__tr_step_end__flow_scalar(tmpdir):
@@ -93,7 +92,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert model.training_step_end_called
-    assert not model.training_epoch_end_called
 
 
 def test__training_step__epoch_end__flow_scalar(tmpdir):
@@ -107,18 +105,6 @@ def training_step(self, batch, batch_idx):
             self.training_step_called = True
             return acc
 
-        def training_epoch_end(self, outputs):
-            self.training_epoch_end_called = True
-
-            # verify we saw the current num of batches
-            assert len(outputs) == 2
-
-            for b in outputs:
-                # time = 1
-                assert len(b) == 1
-                assert "loss" in b
-                assert isinstance(b, dict)
-
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
@@ -138,7 +124,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert not model.training_step_end_called
-    assert model.training_epoch_end_called
 
     # assert epoch end metrics were added
     assert len(trainer.callback_metrics) == 0
@@ -161,7 +146,7 @@ def backward(self, loss, optimizer, optimizer_idx):
 
 
 def test__training_step__step_end__epoch_end__flow_scalar(tmpdir):
-    """Checks train_step + training_step_end + training_epoch_end (all with scalar return from train_step)."""
+    """Checks train_step + training_step_end (all with scalar return from train_step)."""
 
     class TestModel(DeterministicModel):
         def training_step(self, batch, batch_idx):
@@ -177,18 +162,6 @@ def training_step_end(self, tr_step_output):
             self.training_step_end_called = True
             return tr_step_output
 
-        def training_epoch_end(self, outputs):
-            self.training_epoch_end_called = True
-
-            # verify we saw the current num of batches
-            assert len(outputs) == 2
-
-            for b in outputs:
-                # time = 1
-                assert len(b) == 1
-                assert "loss" in b
-                assert isinstance(b, dict)
-
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
@@ -208,7 +181,6 @@ def backward(self, loss, optimizer, optimizer_idx):
     # make sure correct steps were called
     assert model.training_step_called
     assert model.training_step_end_called
-    assert model.training_epoch_end_called
 
     # assert epoch end metrics were added
     assert len(trainer.callback_metrics) == 0
@@ -240,15 +212,9 @@ def training_step(self, batch):
             loss = self.step(batch[0])
             self.log("a", loss, on_step=True, on_epoch=True)
 
-        def training_epoch_end(self, outputs) -> None:
-            assert len(outputs) == 0, outputs
-
         def validation_step(self, batch, batch_idx):
             self.validation_step_called = True
 
-        def validation_epoch_end(self, outputs):
-            assert len(outputs) == 0, outputs
-
     model = TestModel()
     trainer_args = dict(default_root_dir=tmpdir, fast_dev_run=2)
     trainer = Trainer(**trainer_args)
diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py
index 16b2a04a95817..6d8e7ce9e347c 100644
--- a/tests/tests_pytorch/models/test_hooks.py
+++ b/tests/tests_pytorch/models/test_hooks.py
@@ -67,7 +67,7 @@ def on_before_zero_grad(self, optimizer):
     assert 0 == model.on_before_zero_grad_called
 
 
-def test_training_epoch_end_metrics_collection(tmpdir):
+def test_on_train_epoch_end_metrics_collection(tmpdir):
     """Test that progress bar metrics also get collected at the end of an epoch."""
     num_epochs = 3
 
@@ -77,7 +77,7 @@ def training_step(self, *args, **kwargs):
             self.log_dict({"step_metric": torch.tensor(-1), "shared_metric": 100}, logger=False, prog_bar=True)
             return output
 
-        def training_epoch_end(self, outputs):
+        def on_train_epoch_end(self):
             epoch = self.current_epoch
             # both scalar tensors and Python numbers are accepted
             self.log_dict(
@@ -99,40 +99,6 @@ def training_epoch_end(self, outputs):
         assert metrics[f"epoch_metric_{i}"] == i
 
 
-def test_training_epoch_end_metrics_collection_on_override(tmpdir):
-    """Test that batch end metrics are collected when training_epoch_end is overridden at the end of an epoch."""
-
-    class OverriddenModel(BoringModel):
-        def __init__(self):
-            super().__init__()
-            self.len_outputs = 0
-
-        def on_train_epoch_start(self):
-            self.num_train_batches = 0
-
-        def training_epoch_end(self, outputs):
-            self.len_outputs = len(outputs)
-
-        def on_train_batch_end(self, outputs, batch, batch_idx):
-            self.num_train_batches += 1
-
-    class NotOverriddenModel(BoringModel):
-        def on_train_epoch_start(self):
-            self.num_train_batches = 0
-
-        def on_train_batch_end(self, outputs, batch, batch_idx):
-            self.num_train_batches += 1
-
-    overridden_model = OverriddenModel()
-    not_overridden_model = NotOverriddenModel()
-    not_overridden_model.training_epoch_end = None
-
-    trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, overfit_batches=2)
-
-    trainer.fit(overridden_model)
-    assert overridden_model.len_outputs == overridden_model.num_train_batches
-
-
 @pytest.mark.parametrize(
     "accelerator,expected_device_str",
     [
@@ -214,7 +180,6 @@ def train_dataloader(self):
 
     model = TestModel()
     model.validation_step = None
-    model.training_epoch_end = None
     trainer = Trainer(
         default_root_dir=tmpdir,
         limit_train_batches=2,
@@ -286,14 +251,6 @@ def call(hook, fn, *args, **kwargs):
             update_wrapper(partial_h, attr)
             setattr(self, h, partial_h)
 
-    def validation_epoch_end(self, *args, **kwargs):
-        # `BoringModel` does not have a return for `validation_step_end` so this would fail
-        pass
-
-    def test_epoch_end(self, *args, **kwargs):
-        # `BoringModel` does not have a return for `test_step_end` so this would fail
-        pass
-
     def _train_batch(self, *args, **kwargs):
         if self.automatic_optimization:
             return self._auto_train_batch(*args, **kwargs)
@@ -391,12 +348,10 @@ def _manual_train_batch(trainer, model, batches, device=torch.device("cpu"), **k
 
     @staticmethod
     def _eval_epoch(fn, trainer, model, batches, key, device=torch.device("cpu")):
-        outputs = {key: ANY}
         return [
             dict(name=f"Callback.on_{fn}_epoch_start", args=(trainer, model)),
             dict(name=f"on_{fn}_epoch_start"),
             *HookedModel._eval_batch(fn, trainer, model, batches, key, device=device),
-            dict(name=f"{fn}_epoch_end", args=([outputs] * batches,)),
             dict(name=f"Callback.on_{fn}_epoch_end", args=(trainer, model)),
             dict(name=f"on_{fn}_epoch_end"),
         ]
@@ -546,7 +501,6 @@ def training_step(self, batch, batch_idx):
         dict(name="on_validation_end"),
         dict(name="train", args=(True,)),
         dict(name="on_validation_model_train"),
-        dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
         # `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_train_epoch_end`
         dict(name="Callback.state_dict"),
@@ -625,7 +579,6 @@ def test_trainer_model_hook_system_fit_no_val_and_resume_max_epochs(tmpdir):
         dict(name="Callback.on_train_epoch_start", args=(trainer, model)),
         dict(name="on_train_epoch_start"),
         *model._train_batch(trainer, model, 2, current_epoch=1, current_batch=0),
-        dict(name="training_epoch_end", args=([dict(loss=ANY)] * 2,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
         dict(name="Callback.state_dict"),
         dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
@@ -704,7 +657,6 @@ def test_trainer_model_hook_system_fit_no_val_and_resume_max_steps(tmpdir):
         dict(name="Callback.on_train_epoch_start", args=(trainer, model)),
         dict(name="on_train_epoch_start"),
         *model._train_batch(trainer, model, steps_after_reload, current_batch=1),
-        dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
         dict(name="Callback.state_dict"),
         dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
@@ -789,7 +741,6 @@ def test_trainer_model_hook_system_predict(tmpdir):
         dict(name="Callback.on_predict_epoch_start", args=(trainer, model)),
         dict(name="on_predict_epoch_start"),
         *model._predict_batch(trainer, model, batches),
-        # TODO: `predict_epoch_end`
         dict(name="Callback.on_predict_epoch_end", args=(trainer, model, [[ANY] * batches])),
         dict(name="on_predict_epoch_end", args=([[ANY] * batches],)),
         dict(name="Callback.on_predict_end", args=(trainer, model)),
@@ -835,7 +786,6 @@ def predict_dataloader(self):
             return [DataLoader(RandomDataset(32, 64)), DataLoader(RandomDataset(32, 64))]
 
     model = CustomBoringModel()
-    model.test_epoch_end = None
 
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=5)
 
diff --git a/tests/tests_pytorch/plugins/test_double_plugin.py b/tests/tests_pytorch/plugins/test_double_plugin.py
index d5e9060a792e5..ee7b6ba795325 100644
--- a/tests/tests_pytorch/plugins/test_double_plugin.py
+++ b/tests/tests_pytorch/plugins/test_double_plugin.py
@@ -45,9 +45,8 @@ def training_step(self, batch, batch_idx):
         assert float_data.dtype == torch.float64
         return super().training_step(float_data, batch_idx)
 
-    def training_epoch_end(self, outputs) -> None:
+    def on_train_epoch_end(self):
         assert torch.tensor([0.0]).dtype == torch.float32
-        return super().training_epoch_end(outputs)
 
     def validation_step(self, batch, batch_idx):
         assert batch.dtype == torch.float64
diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
index e864ae5c1031a..f78696fb996c2 100644
--- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
+++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -646,7 +646,6 @@ def test_deepspeed_multigpu_stage_3(tmpdir):
 def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config):
     """Test to ensure ZeRO Stage 3 works with a parallel model."""
     model = ModelParallelBoringModelManualOptim()
-    model.training_epoch_end = None
     trainer = Trainer(
         default_root_dir=tmpdir,
         strategy=DeepSpeedStrategy(stage=3),
diff --git a/tests/tests_pytorch/strategies/test_dp.py b/tests/tests_pytorch/strategies/test_dp.py
index ed7e2658571e2..2f1bccff58ae2 100644
--- a/tests/tests_pytorch/strategies/test_dp.py
+++ b/tests/tests_pytorch/strategies/test_dp.py
@@ -91,6 +91,12 @@ def test_multi_gpu_model_dp(tmpdir):
 
 
 class ReductionTestModel(BoringModel):
+    def __init__(self):
+        super().__init__()
+        self.train_outputs = []
+        self.val_outputs = []
+        self.tests_outputs = []
+
     def train_dataloader(self):
         return DataLoader(RandomDataset(32, 64), batch_size=2)
 
@@ -111,29 +117,32 @@ def add_outputs(self, output, device):
     def training_step(self, batch, batch_idx):
         output = super().training_step(batch, batch_idx)
         self.add_outputs(output, batch.device)
+        self.train_outputs.append(output)
         return output
 
     def validation_step(self, batch, batch_idx):
         output = super().validation_step(batch, batch_idx)
         self.add_outputs(output, batch.device)
+        self.val_outputs.append(output)
         return output
 
     def test_step(self, batch, batch_idx):
         output = super().test_step(batch, batch_idx)
         self.add_outputs(output, batch.device)
+        self.tests_outputs.append(output)
         return output
 
-    def training_epoch_end(self, outputs):
-        assert outputs[0]["loss"].shape == torch.Size([])
-        self._assert_extra_outputs(outputs)
+    def on_train_epoch_end(self):
+        assert self.train_outputs[0]["loss"].shape == torch.Size([])
+        self._assert_extra_outputs(self.train_outputs)
 
-    def validation_epoch_end(self, outputs):
-        assert outputs[0]["x"].shape == torch.Size([2])
-        self._assert_extra_outputs(outputs)
+    def on_validation_epoch_end(self):
+        assert self.val_outputs[0]["x"].shape == torch.Size([2])
+        self._assert_extra_outputs(self.val_outputs)
 
-    def test_epoch_end(self, outputs):
-        assert outputs[0]["y"].shape == torch.Size([2])
-        self._assert_extra_outputs(outputs)
+    def on_test_epoch_end(self):
+        assert self.tests_outputs[0]["y"].shape == torch.Size([2])
+        self._assert_extra_outputs(self.test_outputs)
 
     def _assert_extra_outputs(self, outputs):
         out = outputs[0]["reduce_int"]
diff --git a/tests/tests_pytorch/trainer/connectors/test_data_connector.py b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
index fb0e68c34c000..dc9070cdff9a3 100644
--- a/tests/tests_pytorch/trainer/connectors/test_data_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
@@ -90,7 +90,6 @@ def test_dataloader(self):
             return [self.create_dataset()] * self._numbers_test_dataloaders
 
     model = TestModel(2, mode)
-    model.test_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/trainer/dynamic_args/test_multiple_eval_dataloaders.py b/tests/tests_pytorch/trainer/dynamic_args/test_multiple_eval_dataloaders.py
index bcbc72d99e91b..6e422f413880f 100644
--- a/tests/tests_pytorch/trainer/dynamic_args/test_multiple_eval_dataloaders.py
+++ b/tests/tests_pytorch/trainer/dynamic_args/test_multiple_eval_dataloaders.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pytest
 import torch
 from torch.utils.data import Dataset
 
@@ -42,41 +43,8 @@ def __len__(self):
         return self.len
 
 
-def test_multiple_eval_dataloaders_tuple(tmpdir):
-    class TestModel(BoringModel):
-        def validation_step(self, batch, batch_idx, dataloader_idx):
-            if dataloader_idx == 0:
-                assert batch.sum() == 0
-            elif dataloader_idx == 1:
-                assert batch.sum() == 11
-            else:
-                raise Exception("should only have two dataloaders")
-
-        def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array with an entry per optimizer
-            assert len(outputs) == 2
-
-        def val_dataloader(self):
-            dl1 = torch.utils.data.DataLoader(RandomDatasetA(32, 64), batch_size=11)
-            dl2 = torch.utils.data.DataLoader(RandomDatasetB(32, 64), batch_size=11)
-            return [dl1, dl2]
-
-    model = TestModel()
-    model.validation_epoch_end = None
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        max_epochs=1,
-        log_every_n_steps=1,
-        enable_model_summary=False,
-    )
-
-    trainer.fit(model)
-
-
-def test_multiple_eval_dataloaders_list(tmpdir):
+@pytest.mark.parametrize("seq_type", (tuple, list))
+def test_multiple_eval_dataloaders_seq(tmpdir, seq_type):
     class TestModel(BoringModel):
         def validation_step(self, batch, batch_idx, dataloader_idx):
             if dataloader_idx == 0:
@@ -89,10 +57,9 @@ def validation_step(self, batch, batch_idx, dataloader_idx):
         def val_dataloader(self):
             dl1 = torch.utils.data.DataLoader(RandomDatasetA(32, 64), batch_size=11)
             dl2 = torch.utils.data.DataLoader(RandomDatasetB(32, 64), batch_size=11)
-            return dl1, dl2
+            return seq_type((dl1, dl2))
 
     model = TestModel()
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -126,10 +93,6 @@ def training_step(self, batch, batch_idx, optimizer_idx):
             loss = self.step(batch[0])
             return loss
 
-        def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array with an entry per optimizer
-            assert len(outputs) == 2
-
         def validation_step(self, batch, batch_idx, dataloader_idx):
             if dataloader_idx == 0:
                 assert batch.sum() == 0
@@ -149,7 +112,6 @@ def configure_optimizers(self):
             return optimizer, optimizer_2
 
     model = TestModel()
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py b/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py
index fa6ca71007e81..ea459bcd6f0d8 100644
--- a/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py
+++ b/tests/tests_pytorch/trainer/flags/test_fast_dev_run.py
@@ -40,9 +40,9 @@ class FastDevRunModel(BoringModel):
         def __init__(self):
             super().__init__()
             self.training_step_call_count = 0
-            self.training_epoch_end_call_count = 0
+            self.on_train_epoch_end_call_count = 0
             self.validation_step_call_count = 0
-            self.validation_epoch_end_call_count = 0
+            self.on_validation_epoch_end_call_count = 0
             self.test_step_call_count = 0
 
         def training_step(self, batch, batch_idx):
@@ -51,17 +51,15 @@ def training_step(self, batch, batch_idx):
             self.training_step_call_count += 1
             return super().training_step(batch, batch_idx)
 
-        def training_epoch_end(self, outputs):
-            self.training_epoch_end_call_count += 1
-            super().training_epoch_end(outputs)
+        def on_train_epoch_end(self):
+            self.on_train_epoch_end_call_count += 1
 
         def validation_step(self, batch, batch_idx):
             self.validation_step_call_count += 1
             return super().validation_step(batch, batch_idx)
 
-        def validation_epoch_end(self, outputs):
-            self.validation_epoch_end_call_count += 1
-            super().validation_epoch_end(outputs)
+        def on_validation_epoch_end(self):
+            self.on_validation_epoch_end_call_count += 1
 
         def test_step(self, batch, batch_idx):
             self.test_step_call_count += 1
@@ -83,9 +81,9 @@ def test_step(self, batch, batch_idx):
     def _make_fast_dev_run_assertions(trainer, model):
         # check the call count for train/val/test step/epoch
         assert model.training_step_call_count == fast_dev_run
-        assert model.training_epoch_end_call_count == 1
+        assert model.on_train_epoch_end_call_count == 1
         assert model.validation_step_call_count == 0 if model.validation_step is None else fast_dev_run
-        assert model.validation_epoch_end_call_count == 0 if model.validation_step is None else 1
+        assert model.on_validation_epoch_end_call_count == 0 if model.validation_step is None else 1
         assert model.test_step_call_count == fast_dev_run
 
         # check trainer arguments
diff --git a/tests/tests_pytorch/trainer/flags/test_min_max_epochs.py b/tests/tests_pytorch/trainer/flags/test_min_max_epochs.py
index f8d01b1497164..c1ba8f979887c 100644
--- a/tests/tests_pytorch/trainer/flags/test_min_max_epochs.py
+++ b/tests/tests_pytorch/trainer/flags/test_min_max_epochs.py
@@ -47,7 +47,6 @@ def training_step(self, *args, **kwargs):
     match = "`max_epochs` was not set. Setting it to 1000 epochs."
 
     model = CustomModel()
-    model.training_epoch_end = None
     trainer = Trainer(max_epochs=None, limit_train_batches=1)
     with pytest.warns(PossibleUserWarning, match=match):
         trainer.fit(model)
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index ff950f7a8f679..4385b8529a029 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -84,7 +84,6 @@ def test_all_rank_logging_ddp_spawn(tmpdir):
     """Check that all ranks can be logged from."""
     model = TestModel()
     all_rank_logger = AllRankLogger()
-    model.training_epoch_end = None
     trainer = Trainer(
         strategy="ddp_spawn",
         accelerator="gpu",
diff --git a/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py
index 32b0358c7933a..6378f3e885e83 100644
--- a/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py
@@ -56,7 +56,6 @@ def validation_step(self, batch, batch_idx):
 
     model = TestModel()
     model.validation_step_end = None
-    model.validation_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -79,7 +78,7 @@ def validation_step(self, batch, batch_idx):
 
 
 def test__validation_step__epoch_end__log(tmpdir):
-    """Tests that validation_epoch_end can log."""
+    """Tests that on_validation_epoch_end can log."""
 
     class TestModel(BoringModel):
         def training_step(self, batch, batch_idx):
@@ -94,7 +93,7 @@ def validation_step(self, batch, batch_idx):
             self.log("d", out["x"], on_step=True, on_epoch=True)
             return out
 
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             self.log("g", torch.tensor(2, device=self.device), on_epoch=True)
 
     model = TestModel()
@@ -124,7 +123,7 @@ def validation_epoch_end(self, outputs):
 @pytest.mark.parametrize(["batches", "log_interval", "max_epochs"], [(1, 1, 1), (64, 32, 2)])
 def test_eval_epoch_logging(tmpdir, batches, log_interval, max_epochs):
     class TestModel(BoringModel):
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             self.log("c", torch.tensor(2), on_epoch=True, prog_bar=True, logger=True)
             self.log("d/e/f", 2)
 
@@ -188,10 +187,8 @@ def validation_step(self, batch, batch_idx):
             self.log("val_loss", loss, on_epoch=True, on_step=True, prog_bar=True)
             return {"x": loss}
 
-        def validation_epoch_end(self, outputs) -> None:
-            for passed_in, manually_tracked in zip(outputs, self.val_losses):
-                assert passed_in["x"] == manually_tracked
-            self.manual_epoch_end_mean = torch.stack([x["x"] for x in outputs]).mean()
+        def on_validation_epoch_end(self) -> None:
+            self.manual_epoch_end_mean = torch.stack(self.val_losses).mean()
 
     model = TestModel()
     trainer = Trainer(
@@ -217,10 +214,10 @@ def validation_epoch_end(self, outputs) -> None:
 
 @pytest.mark.parametrize(["batches", "log_interval", "max_epochs"], [(1, 1, 1), (64, 32, 2)])
 def test_eval_epoch_only_logging(tmpdir, batches, log_interval, max_epochs):
-    """Tests that test_epoch_end can be used to log, and we return them in the results."""
+    """Tests that on_test_epoch_end can be used to log, and we return them in the results."""
 
     class TestModel(BoringModel):
-        def test_epoch_end(self, outputs):
+        def on_test_epoch_end(self):
             self.log("c", torch.tensor(2))
             self.log("d/e/f", 2)
 
@@ -255,7 +252,6 @@ def test_dataloader(self):
             return super().test_dataloader()
 
     model = TestModel()
-    model.test_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -332,7 +328,6 @@ def validation_step(self, batch, batch_idx):
             self.log("val_loss", loss)
 
     model = TestModel()
-    model.validation_epoch_end = None
     cb = TestCallback()
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -459,7 +454,6 @@ def test_dataloader(self):
             return [torch.utils.data.DataLoader(RandomDataset(32, 64)) for _ in range(num_dataloaders)]
 
     model = TestModel()
-    model.test_epoch_end = None
     cb = TestCallback()
     trainer = Trainer(
         default_root_dir=tmpdir, limit_test_batches=2, num_sanity_val_steps=0, max_epochs=2, callbacks=[cb]
@@ -533,7 +527,6 @@ def test_step(self, batch, batch_idx):
             return {"y": loss}
 
     model = ExtendedModel()
-    model.validation_epoch_end = None
 
     # Initialize a trainer
     trainer = Trainer(
@@ -596,6 +589,8 @@ def get_metrics_at_idx(idx):
 @pytest.mark.parametrize("val_check_interval", [0.5, 1.0])
 def test_multiple_dataloaders_reset(val_check_interval, tmpdir):
     class TestModel(BoringModel):
+        val_outputs = [[], []]
+
         def training_step(self, batch, batch_idx):
             out = super().training_step(batch, batch_idx)
             value = 1 + batch_idx
@@ -604,7 +599,7 @@ def training_step(self, batch, batch_idx):
             self.log("batch_idx", value, on_step=True, on_epoch=True, prog_bar=True)
             return out
 
-        def training_epoch_end(self, outputs):
+        def on_training_epoch_end(self):
             metrics = self.trainer.progress_bar_metrics
             v = 15 if self.current_epoch == 0 else 150
             assert metrics["batch_idx_epoch"] == (v / 5.0)
@@ -613,10 +608,13 @@ def validation_step(self, batch, batch_idx, dataloader_idx):
             value = (1 + batch_idx) * (1 + dataloader_idx)
             if self.current_epoch != 0:
                 value *= 10
+            self.val_outputs[dataloader_idx].append(value)
             self.log("val_loss", value, on_step=False, on_epoch=True, prog_bar=True, logger=True)
-            return value
 
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
+            outputs = self.val_outputs
+            self.val_outputs = [[], []]
+
             if self.current_epoch == 0:
                 assert sum(outputs[0]) / 5 == 3
                 assert sum(outputs[1]) / 5 == 6
@@ -658,6 +656,8 @@ def val_dataloader(self):
 )
 def test_metrics_and_outputs_device(tmpdir, accelerator):
     class TestModel(BoringModel):
+        outputs = []
+
         def on_before_backward(self, loss: Tensor) -> None:
             # the loss should be on the correct device before backward
             assert loss.device.type == accelerator
@@ -667,13 +667,13 @@ def validation_step(self, *args):
             y = x * 2
             assert x.requires_grad is True
             assert y.grad_fn is None  # disabled by validation
-
             self.log("foo", y)
+            self.outputs.append(y)
             return y
 
-        def validation_epoch_end(self, outputs):
-            # the step outputs were not moved
-            assert all(o.device == self.device for o in outputs)
+        def on_validation_epoch_end(self):
+            # the step outputs were not moved after returning them
+            assert all(o.device == self.device for o in self.outputs)
             # and the logged metrics aren't
             assert self.trainer.callback_metrics["foo"].device.type == accelerator
 
@@ -706,7 +706,6 @@ def test_dataloader(self):
             return [torch.utils.data.DataLoader(RandomDataset(32, 64)) for _ in range(num_dataloaders)]
 
     model = CustomBoringModel()
-    model.test_epoch_end = None
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
     results = trainer.test(model)
 
@@ -728,12 +727,16 @@ def test_dataloader(self):
 @mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics")
 def test_logging_multi_dataloader_on_epoch_end(mock_log_metrics, tmpdir):
     class CustomBoringModel(BoringModel):
+        outputs = [[], []]
+
         def test_step(self, batch, batch_idx, dataloader_idx):
-            self.log("foo", dataloader_idx + 1)
-            return dataloader_idx + 1
+            value = dataloader_idx + 1
+            self.log("foo", value)
+            self.outputs[dataloader_idx].append(value)
+            return value
 
-        def test_epoch_end(self, outputs) -> None:
-            self.log("foobar", sum(sum(o) for o in outputs))
+        def on_test_epoch_end(self):
+            self.log("foobar", sum(sum(o) for o in self.outputs))
 
         def test_dataloader(self):
             return [super().test_dataloader(), super().test_dataloader()]
@@ -742,7 +745,7 @@ def test_dataloader(self):
     trainer = Trainer(default_root_dir=tmpdir, limit_test_batches=1, logger=TensorBoardLogger(tmpdir))
     results = trainer.test(model)
 
-    # what's logged in `test_epoch_end` gets included in the results of each dataloader
+    # what's logged in `on_test_epoch_end` gets included in the results of each dataloader
     assert results == [{"foo/dataloader_idx_0": 1, "foobar": 3}, {"foo/dataloader_idx_1": 2, "foobar": 3}]
     cb_metrics = set(trainer.callback_metrics)
     assert cb_metrics == {"foo/dataloader_idx_0", "foo/dataloader_idx_1", "foobar"}
@@ -960,9 +963,6 @@ def val_dataloader(self):
         def test_dataloader(self):
             return [super().test_dataloader()] * num_dataloaders
 
-        validation_epoch_end = None
-        test_epoch_end = None
-
     limit_batches = 4
     max_epochs = 3
     trainer = Trainer(
diff --git a/tests/tests_pytorch/trainer/logging_/test_logger_connector.py b/tests/tests_pytorch/trainer/logging_/test_logger_connector.py
index 48249ed969e59..ae857345aeb4e 100644
--- a/tests/tests_pytorch/trainer/logging_/test_logger_connector.py
+++ b/tests/tests_pytorch/trainer/logging_/test_logger_connector.py
@@ -276,8 +276,7 @@ def training_step_end(self, training_step_outputs):  # required for dp
             loss = training_step_outputs["loss"].mean()
             return loss
 
-        def training_epoch_end(self, outputs):
-            assert all(out["loss"].device == root_device for out in outputs)
+        def on_train_epoch_end(self):
             assert self.trainer.callback_metrics["train_loss_epoch"].device == root_device
 
         def validation_step(self, *args, **kwargs):
@@ -285,8 +284,7 @@ def validation_step(self, *args, **kwargs):
             self.log("val_loss_epoch", val_loss, on_step=False, on_epoch=True)
             return val_loss
 
-        def validation_epoch_end(self, outputs):
-            assert all(loss.device == root_device for loss in outputs)
+        def on_validation_epoch_end(self):
             assert self.trainer.callback_metrics["val_loss_epoch"].device == root_device
 
         def test_step(self, *args, **kwargs):
@@ -294,8 +292,7 @@ def test_step(self, *args, **kwargs):
             self.log("test_loss_epoch", test_loss, on_step=False, on_epoch=True)
             return test_loss
 
-        def test_epoch_end(self, outputs):
-            assert all(loss.device == root_device for loss in outputs)
+        def on_test_epoch_end(self):
             assert self.trainer.callback_metrics["test_loss_epoch"].device == root_device
 
         def train_dataloader(self):
@@ -321,37 +318,6 @@ def test_dataloader(self):
     trainer.test(model)
 
 
-def test_can_return_tensor_with_more_than_one_element(tmpdir):
-    """Ensure {validation,test}_step return values are not included as callback metrics.
-
-    #6623
-    """
-
-    class TestModel(BoringModel):
-        def validation_step(self, batch, *args, **kwargs):
-            return {"val": torch.tensor([0, 1])}
-
-        def validation_epoch_end(self, outputs):
-            # ensure validation step returns still appear here
-            assert len(outputs) == 2
-            assert all(list(d) == ["val"] for d in outputs)  # check keys
-            assert all(torch.equal(d["val"], torch.tensor([0, 1])) for d in outputs)  # check values
-
-        def test_step(self, batch, *args, **kwargs):
-            return {"test": torch.tensor([0, 1])}
-
-        def test_epoch_end(self, outputs):
-            assert len(outputs) == 2
-            assert all(list(d) == ["test"] for d in outputs)  # check keys
-            assert all(torch.equal(d["test"], torch.tensor([0, 1])) for d in outputs)  # check values
-
-    model = TestModel()
-    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=2, enable_progress_bar=False)
-    trainer.fit(model)
-    trainer.validate(model)
-    trainer.test(model)
-
-
 @pytest.mark.parametrize("add_dataloader_idx", [False, True])
 def test_auto_add_dataloader_idx(tmpdir, add_dataloader_idx):
     """test that auto_add_dataloader_idx argument works."""
@@ -372,7 +338,6 @@ def validation_step(self, *args, **kwargs):
             return output
 
     model = TestModel()
-    model.validation_epoch_end = None
 
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=2)
     trainer.fit(model)
diff --git a/tests/tests_pytorch/trainer/logging_/test_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_loop_logging.py
index 3251d4d2aa5ef..b12753628f014 100644
--- a/tests/tests_pytorch/trainer/logging_/test_loop_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_loop_logging.py
@@ -70,7 +70,6 @@ def _make_assertion(model, hooks, result_mock, on_step, on_epoch, extra_kwargs):
             "on_train_start",
             "on_train_epoch_start",
             "on_train_epoch_end",
-            "training_epoch_end",
         ]
         all_logging_hooks = all_logging_hooks - set(hooks)
         _make_assertion(model, hooks, result_mock, on_step=False, on_epoch=True, extra_kwargs=extra_kwargs)
@@ -85,7 +84,6 @@ def _make_assertion(model, hooks, result_mock, on_step, on_epoch, extra_kwargs):
             "on_validation_batch_end",
             "validation_step",
             "validation_step_end",
-            "validation_epoch_end",
         ]
         all_logging_hooks = all_logging_hooks - set(hooks)
         _make_assertion(model, hooks, result_mock, on_step=False, on_epoch=True, extra_kwargs=extra_kwargs)
@@ -100,7 +98,6 @@ def _make_assertion(model, hooks, result_mock, on_step, on_epoch, extra_kwargs):
             "on_test_batch_end",
             "test_step",
             "test_step_end",
-            "test_epoch_end",
         ]
         all_logging_hooks = all_logging_hooks - set(hooks)
         _make_assertion(model, hooks, result_mock, on_step=False, on_epoch=True, extra_kwargs=extra_kwargs)
diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
index e50b11b6ed9d4..4b7db6a272d33 100644
--- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
@@ -103,8 +103,6 @@ def training_step(self, batch, batch_idx):
 
 
 def test__training_step__epoch_end__log(tmpdir):
-    """Tests that training_epoch_end can log."""
-
     class TestModel(BoringModel):
         def training_step(self, batch, batch_idx):
             out = super().training_step(batch, batch_idx)
@@ -113,9 +111,9 @@ def training_step(self, batch, batch_idx):
             self.log_dict({"a1": loss, "a2": loss})
             return out
 
-        def training_epoch_end(self, outputs):
-            self.log("b1", outputs[0]["loss"])
-            self.log("b", outputs[0]["loss"], on_epoch=True, prog_bar=True, logger=True)
+        def on_train_epoch_end(self):
+            self.log("b1", torch.tensor(1.0))
+            self.log("b", torch.tensor(2.0), on_epoch=True, prog_bar=True, logger=True)
 
     model = TestModel()
     model.val_dataloader = None
@@ -144,7 +142,7 @@ def training_epoch_end(self, outputs):
 
 @pytest.mark.parametrize(["batches", "log_interval", "max_epochs"], [(1, 1, 1), (64, 32, 2)])
 def test__training_step__step_end__epoch_end__log(tmpdir, batches, log_interval, max_epochs):
-    """Tests that training_step_end and training_epoch_end can log."""
+    """Tests that training_step_end and on_train_epoch_end can log."""
 
     class TestModel(BoringModel):
         def training_step(self, batch):
@@ -156,8 +154,8 @@ def training_step_end(self, out):
             self.log("b", out, on_step=True, on_epoch=True, prog_bar=True, logger=True)
             return out
 
-        def training_epoch_end(self, outputs):
-            self.log("c", outputs[0]["loss"], on_epoch=True, prog_bar=True, logger=True)
+        def on_train_epoch_end(self):
+            self.log("c", 1, on_epoch=True, prog_bar=True, logger=True)
             self.log("d/e/f", 2)
 
     model = TestModel()
@@ -722,9 +720,13 @@ def training_step(self, batch, batch_idx):
 def test_on_epoch_logging_with_sum_and_on_batch_start(tmpdir):
     class TestModel(BoringModel):
         def on_train_epoch_end(self):
+            self.log("on_train_epoch_end", 3.0, reduce_fx="mean")
+            assert self.trainer._results["on_train_epoch_end.on_train_epoch_end"].value == 3.0
             assert all(v == 3 for v in self.trainer.callback_metrics.values())
 
         def on_validation_epoch_end(self):
+            self.log("on_validation_epoch_end", 3.0, reduce_fx="mean")
+            assert self.trainer._results["on_validation_epoch_end.on_validation_epoch_end"].value == 3.0
             assert all(v == 3 for v in self.trainer.callback_metrics.values())
 
         def on_train_batch_start(self, batch, batch_idx):
@@ -739,16 +741,9 @@ def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
         def on_validation_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
             self.log("on_validation_batch_end", 1.0, reduce_fx="sum")
 
-        def training_epoch_end(self, *_) -> None:
-            self.log("training_epoch_end", 3.0, reduce_fx="mean")
-            assert self.trainer._results["training_epoch_end.training_epoch_end"].value == 3.0
-
-        def validation_epoch_end(self, *_) -> None:
-            self.log("validation_epoch_end", 3.0, reduce_fx="mean")
-            assert self.trainer._results["validation_epoch_end.validation_epoch_end"].value == 3.0
-
     model = TestModel()
     trainer = Trainer(
+        default_root_dir=tmpdir,
         enable_progress_bar=False,
         limit_train_batches=3,
         limit_val_batches=3,
diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
index 55f87e54510f3..d3258b759e86d 100644
--- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
+++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
@@ -74,25 +74,13 @@ def configure_optimizers(self):
 @pytest.mark.parametrize(
     "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": 16}, marks=RunIf(min_cuda_gpus=1))]
 )
-def test_multiple_optimizers_manual_no_return(tmpdir, kwargs):
-    class TestModel(ManualOptModel):
-        def training_step(self, batch, batch_idx):
-            # avoid returning a value
-            super().training_step(batch, batch_idx)
-
-        def training_epoch_end(self, outputs):
-            # outputs is empty as training_step does not return
-            # and it is not automatic optimization
-            assert not outputs
-
-    model = TestModel()
-    model.val_dataloader = None
-
+def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs):
+    model = ManualOptModel()
     limit_train_batches = 2
     trainer = Trainer(
         default_root_dir=tmpdir,
         limit_train_batches=limit_train_batches,
-        limit_val_batches=2,
+        limit_val_batches=0,
         max_epochs=1,
         log_every_n_steps=1,
         enable_model_summary=False,
@@ -109,58 +97,25 @@ def training_epoch_end(self, outputs):
     with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward) as bwd_mock:
         trainer.fit(model)
     assert bwd_mock.call_count == limit_train_batches * 3
+    assert trainer.global_step == limit_train_batches * 2
 
     if kwargs.get("precision") == 16:
         scaler_step_patch.stop()
         assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches
 
 
-def test_multiple_optimizers_manual_return(tmpdir):
-    class TestModel(ManualOptModel):
-        def training_step(self, batch, batch_idx):
-            super().training_step(batch, batch_idx)
-            return {"something": "else"}
-
-        def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array with an entry per optimizer
-            assert outputs == [{"something": "else"}, {"something": "else"}]
-
-    model = TestModel()
-    model.val_dataloader = None
-
-    limit_train_batches = 2
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=limit_train_batches,
-        limit_val_batches=2,
-        max_epochs=1,
-        log_every_n_steps=1,
-        enable_model_summary=False,
-    )
-
-    with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward) as bwd_mock:
-        trainer.fit(model)
-    assert bwd_mock.call_count == limit_train_batches * 3
-    assert trainer.global_step == limit_train_batches * 2
-
-
 def test_multiple_optimizers_manual_log(tmpdir):
     class TestModel(ManualOptModel):
         def training_step(self, batch, batch_idx):
             loss_2 = super().training_step(batch, batch_idx)
             self.log("a", loss_2, on_epoch=True)
 
-        def training_epoch_end(self, outputs) -> None:
-            assert not outputs
-
     model = TestModel()
-    model.val_dataloader = None
-
     limit_train_batches = 2
     trainer = Trainer(
         default_root_dir=tmpdir,
         limit_train_batches=limit_train_batches,
-        limit_val_batches=2,
+        limit_val_batches=0,
         max_epochs=1,
         log_every_n_steps=1,
         enable_model_summary=False,
@@ -262,7 +217,6 @@ def test_manual_optimization_and_return_tensor(tmpdir):
 
     model = ManualOptimizationExtendedModel()
     model.training_step_end = None
-    model.training_epoch_end = None
 
     trainer = Trainer(
         max_epochs=1,
@@ -348,7 +302,6 @@ def on_train_epoch_end(self, *_, **__):
 
     model = ExtendedModel()
     model.training_step_end = None
-    model.training_epoch_end = None
 
     trainer = Trainer(
         max_epochs=1,
@@ -396,10 +349,6 @@ def training_step(self, batch, batch_idx):
 
             return {"loss1": loss_1.detach(), "loss2": loss_2.detach()}
 
-        def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array with an entry per optimizer
-            assert len(outputs) == 2
-
         # sister test: tests/plugins/test_amp_plugins.py::test_amp_gradient_unscale
         def on_after_backward(self) -> None:
             # check grads are scaled
@@ -498,7 +447,6 @@ def optimizer_closure():
             assert not torch.equal(weight_before, weight_after)
 
     model = TestModel()
-    model.training_epoch_end = None
 
     limit_train_batches = 2
     trainer = Trainer(
@@ -540,8 +488,6 @@ def optimizer_closure():
             assert not torch.equal(weight_before, weight_after)
 
     model = TestModel()
-    model.training_epoch_end = None
-
     limit_train_batches = 4
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -615,7 +561,6 @@ def configure_optimizers(self):
 
     model = TestModel()
     model.val_dataloader = None
-    model.training_epoch_end = None
 
     limit_train_batches = 8
     trainer = Trainer(
@@ -727,8 +672,6 @@ def train_manual_optimization(tmpdir, strategy, model_cls=TesManualOptimizationD
     model = model_cls()
     model_copy = deepcopy(model)
     model.val_dataloader = None
-    model.training_epoch_end = None
-
     limit_train_batches = 8
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -847,7 +790,6 @@ def configure_optimizers(self):
             return [optimizer_1, optimizer_2], [self.scheduler_1, self.scheduler_2]
 
     model = TestModel()
-    model.training_epoch_end = None
 
     trainer = Trainer(
         default_root_dir=tmpdir, max_epochs=1, limit_train_batches=1, limit_val_batches=1, limit_test_batches=1
@@ -864,14 +806,9 @@ def __init__(self, scheduler_as_dict):
             self.scheduler_as_dict = scheduler_as_dict
             self.automatic_optimization = False
 
-        def training_step(self, batch, batch_idx):
-            return {"train_loss": torch.tensor([0.0])}
-
-        def training_epoch_end(self, outputs):
+        def on_train_epoch_end(self):
             scheduler = self.lr_schedulers()
-
-            loss = torch.stack([x["train_loss"] for x in outputs]).mean()
-            scheduler.step(loss)
+            scheduler.step(torch.tensor(0.0))
 
         def configure_optimizers(self):
             optimizer = torch.optim.SGD(self.parameters(), lr=0.1)
@@ -905,7 +842,6 @@ def test_lr_scheduler_step_not_called(tmpdir):
     """Test `lr_scheduler.step()` is not called in manual optimization."""
     model = ManualOptimBoringModel()
     model.training_step_end = None
-    model.training_epoch_end = None
 
     trainer = Trainer(max_epochs=1, default_root_dir=tmpdir, fast_dev_run=2)
 
@@ -961,7 +897,6 @@ def configure_optimizers(self):
             return optimizer, optimizer_2
 
     model = TestModel()
-    model.training_epoch_end = None
     model.val_dataloader = None
 
     trainer = Trainer(
diff --git a/tests/tests_pytorch/trainer/optimization/test_multiple_optimizers.py b/tests/tests_pytorch/trainer/optimization/test_multiple_optimizers.py
index 9c306fe8d2d74..553f3ff341699 100644
--- a/tests/tests_pytorch/trainer/optimization/test_multiple_optimizers.py
+++ b/tests/tests_pytorch/trainer/optimization/test_multiple_optimizers.py
@@ -41,7 +41,6 @@ def training_step(self, batch, batch_idx, optimizer_idx):
             return out
 
     model = TestModel()
-    model.training_epoch_end = None
 
     # Initialize a trainer
     trainer = pl.Trainer(
@@ -64,10 +63,6 @@ def training_step(self, batch, batch_idx, optimizer_idx):
             self.seen[optimizer_idx] = True
             return super().training_step(batch, batch_idx)
 
-        def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array with an entry per optimizer
-            assert len(outputs) == 2
-
     model = TestModel()
     model.val_dataloader = None
 
@@ -108,11 +103,6 @@ def training_step(self, batch, batch_idx):
             opt_b.step()
             opt_b.zero_grad()
 
-        def training_epoch_end(self, outputs) -> None:
-            # outputs is empty as training_step does not return
-            # and it is not automatic optimization
-            assert len(outputs) == 0
-
     model = TestModel()
     model.val_dataloader = None
 
@@ -162,11 +152,6 @@ def training_step(self, batch, batch_idx, optimizer_idx):
             loss = torch.nn.functional.mse_loss(x, torch.ones_like(x))
             return loss
 
-        def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array of batches with an entry per optimizer
-            assert len(outputs) == limit_train_batches
-            assert all(len(o) == 2 for o in outputs)
-
         def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, **_):
             # update first optimizer every step
             if optimizer_idx == 0:
diff --git a/tests/tests_pytorch/trainer/optimization/test_optimizers.py b/tests/tests_pytorch/trainer/optimization/test_optimizers.py
index c16d331800c61..bd5a4fb745b79 100644
--- a/tests/tests_pytorch/trainer/optimization/test_optimizers.py
+++ b/tests/tests_pytorch/trainer/optimization/test_optimizers.py
@@ -77,7 +77,6 @@ def configure_optimizers(self):
 
     model = Model()
     model.automatic_optimization = False
-    model.training_epoch_end = None
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2)
     trainer.fit(model)
 
@@ -309,9 +308,6 @@ class DummyModel(BoringModel):
         def training_step(self, batch, batch_idx, optimizer_idx):
             return super().training_step(batch, batch_idx)
 
-        def training_epoch_end(self, outputs) -> None:
-            pass
-
         def configure_optimizers(self):
             optimizer1 = optim.Adam(self.parameters(), lr=0.01)
             optimizer2 = optim.Adam(self.parameters(), lr=0.01)
@@ -391,7 +387,6 @@ def configure_optimizers(self):
             return a, b
 
     model = TestModel()
-    model.training_epoch_end = None
     trainer = Trainer(
         callbacks=[CB()],
         default_root_dir=tmpdir,
@@ -687,7 +682,6 @@ def on_save_checkpoint(self, checkpoint):
             self.on_save_checkpoint_called = True
 
     model = TestModel()
-    model.training_epoch_end = None
     trainer.fit(model)
     assert model.on_save_checkpoint_called
 
@@ -724,7 +718,6 @@ def configure_optimizers(self):
             return [optimizer], [lr_scheduler1, lr_scheduler2]
 
     model = CustomBoringModel()
-    model.training_epoch_end = None
     max_epochs = 3
     limit_train_batches = 2
     trainer = Trainer(
diff --git a/tests/tests_pytorch/trainer/test_config_validator.py b/tests/tests_pytorch/trainer/test_config_validator.py
index ea60d7985e6ad..589ec6139b5bc 100644
--- a/tests/tests_pytorch/trainer/test_config_validator.py
+++ b/tests/tests_pytorch/trainer/test_config_validator.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from unittest.mock import Mock
+
 import pytest
 import torch
 
@@ -18,6 +20,10 @@
 from lightning_fabric.utilities.warnings import PossibleUserWarning
 from pytorch_lightning import LightningDataModule, LightningModule, Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
+from pytorch_lightning.trainer.configuration_validator import (
+    __verify_eval_loop_configuration,
+    __verify_train_val_loop_configuration,
+)
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests_pytorch.conftest import mock_cuda_count
 
@@ -158,3 +164,28 @@ def custom_method(self, batch, *_, **__):
 
     with pytest.raises(MisconfigurationException, match=match_pattern):
         trainer.fit(model)
+
+
+def test_legacy_epoch_end_hooks():
+    class TrainingEpochEndModel(BoringModel):
+        def training_epoch_end(self, outputs):
+            pass
+
+    class ValidationEpochEndModel(BoringModel):
+        def validation_epoch_end(self, outputs):
+            pass
+
+    trainer = Mock()
+    with pytest.raises(NotImplementedError, match="training_epoch_end` has been removed in v2.0"):
+        __verify_train_val_loop_configuration(trainer, TrainingEpochEndModel())
+    with pytest.raises(NotImplementedError, match="validation_epoch_end` has been removed in v2.0"):
+        __verify_train_val_loop_configuration(trainer, ValidationEpochEndModel())
+
+    class TestEpochEndModel(BoringModel):
+        def test_epoch_end(self, outputs):
+            pass
+
+    with pytest.raises(NotImplementedError, match="validation_epoch_end` has been removed in v2.0"):
+        __verify_eval_loop_configuration(ValidationEpochEndModel(), "val")
+    with pytest.raises(NotImplementedError, match="test_epoch_end` has been removed in v2.0"):
+        __verify_eval_loop_configuration(TestEpochEndModel(), "test")
diff --git a/tests/tests_pytorch/trainer/test_dataloaders.py b/tests/tests_pytorch/trainer/test_dataloaders.py
index c61c54d1001e2..1766155702f37 100644
--- a/tests/tests_pytorch/trainer/test_dataloaders.py
+++ b/tests/tests_pytorch/trainer/test_dataloaders.py
@@ -47,9 +47,6 @@ def val_dataloader(self):
     def validation_step(self, batch, batch_idx, dataloader_idx):
         return super().validation_step(batch, batch_idx)
 
-    def validation_epoch_end(self, *args, **kwargs):
-        pass
-
 
 class MultiTestDataLoaderBoringModel(BoringModel):
     def test_dataloader(self):
@@ -58,9 +55,6 @@ def test_dataloader(self):
     def test_step(self, batch, batch_idx, dataloader_idx):
         return super().test_step(batch, batch_idx)
 
-    def test_epoch_end(self, *args, **kwargs):
-        pass
-
 
 class MultiEvalDataLoaderModel(MultiValDataLoaderBoringModel, MultiTestDataLoaderBoringModel):
     pass
@@ -75,10 +69,8 @@ def test_fit_train_loader_only(tmpdir):
     model.test_dataloader = None
 
     model.validation_step = None
-    model.validation_epoch_end = None
 
     model.test_step = None
-    model.test_epoch_end = None
 
     trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
     trainer.fit(model, train_dataloaders=train_dataloader)
@@ -94,7 +86,6 @@ def test_fit_val_loader_only(tmpdir):
     model.test_dataloader = None
 
     model.test_step = None
-    model.test_epoch_end = None
 
     trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
     trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)
@@ -208,7 +199,7 @@ def training_step(self, batch, batch_idx):
         self.log("loss", self.global_step)
         return super().training_step(batch, batch_idx)
 
-    def validation_epoch_end(self, outputs):
+    def on_validation_epoch_end(self):
         self.log("val_log", self.current_epoch)
 
 
@@ -657,7 +648,7 @@ def __init__(self):
     def training_step(self, batch, batch_idx):
         self.batches_seen.append(batch)
 
-    def training_epoch_end(self, outputs):
+    def on_train_epoch_end(self):
         world_size = 2
         num_samples = NumpyRandomDataset.size
         all_batches = torch.cat(self.batches_seen)
@@ -1050,9 +1041,8 @@ def val_dataloader(self):
             val_reload_epochs.append(self.current_epoch)
             return super().val_dataloader()
 
-        def validation_epoch_end(self, outputs):
+        def on_validation_epoch_end(self):
             val_check_epochs.append(self.current_epoch)
-            return super().validation_epoch_end(outputs)
 
     model = TestModel()
 
@@ -1274,17 +1264,6 @@ def predict(self, batch, batch_idx, dataloader_idx):
             self.assert_dataloader_idx_hook(dataloader_idx)
             return super().predict(batch, batch_idx, dataloader_idx)
 
-        def assert_epoch_end_outputs(self, outputs, mode):
-            assert len(outputs) == 2
-            assert all(f"{mode}_loss_0" in x for x in outputs[0])
-            assert all(f"{mode}_loss_1" in x for x in outputs[1])
-
-        def validation_epoch_end(self, outputs):
-            self.assert_epoch_end_outputs(outputs, mode="val")
-
-        def test_epoch_end(self, outputs):
-            self.assert_epoch_end_outputs(outputs, mode="test")
-
         def train_dataloader(self):
             return {"a": DataLoader(RandomDataset(32, 64)), "b": DataLoader(RandomDataset(32, 64))}
 
diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py
index ae4e6268ce29d..eb6069808df80 100644
--- a/tests/tests_pytorch/trainer/test_trainer.py
+++ b/tests/tests_pytorch/trainer/test_trainer.py
@@ -808,7 +808,6 @@ def predict_step(self, batch, *_):
             return self(batch)
 
     model = TestModel()
-    model.test_epoch_end = None
     trainer = Trainer(
         max_epochs=2,
         limit_val_batches=1,
@@ -879,7 +878,6 @@ def predict_step(self, batch, *_):
             return self(batch)
 
     model = TestModel()
-    model.test_epoch_end = None
     trainer = Trainer(
         max_epochs=2,
         limit_val_batches=1,
@@ -930,16 +928,11 @@ def test_disabled_training(tmpdir):
     class CurrentModel(BoringModel):
 
         training_step_invoked = False
-        training_epoch_end_invoked = False
 
         def training_step(self, *args, **kwargs):
             self.training_step_invoked = True
             return super().training_step(*args, **kwargs)
 
-        def training_epoch_end(self, *args, **kwargs):
-            self.training_epoch_end_invoked = True
-            return super().training_epoch_end(*args, **kwargs)
-
     model = CurrentModel()
 
     trainer_options = dict(
@@ -965,7 +958,6 @@ def training_epoch_end(self, *args, **kwargs):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
     assert trainer.current_epoch == 0
     assert not model.training_step_invoked, "`training_step` should not run when `limit_train_batches=0`"
-    assert not model.training_epoch_end_invoked, "`training_epoch_end` should not run when `limit_train_batches=0`"
 
     # check that limit_train_batches has no influence when fast_dev_run is turned on
     model = CurrentModel()
@@ -983,7 +975,6 @@ def training_epoch_end(self, *args, **kwargs):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
     assert trainer.current_epoch == 1
     assert model.training_step_invoked, "did not run `training_step` with `fast_dev_run=True`"
-    assert model.training_epoch_end_invoked, "did not run `training_epoch_end` with `fast_dev_run=True`"
 
 
 def test_disabled_validation(tmpdir):
@@ -992,16 +983,11 @@ def test_disabled_validation(tmpdir):
     class CurrentModel(BoringModel):
 
         validation_step_invoked = False
-        validation_epoch_end_invoked = False
 
         def validation_step(self, *args, **kwargs):
             self.validation_step_invoked = True
             return super().validation_step(*args, **kwargs)
 
-        def validation_epoch_end(self, *args, **kwargs):
-            self.validation_epoch_end_invoked = True
-            return super().validation_epoch_end(*args, **kwargs)
-
     model = CurrentModel()
 
     trainer_options = dict(
@@ -1020,7 +1006,6 @@ def validation_epoch_end(self, *args, **kwargs):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
     assert trainer.current_epoch == 2
     assert not model.validation_step_invoked, "`validation_step` should not run when `limit_val_batches=0`"
-    assert not model.validation_epoch_end_invoked, "`validation_epoch_end` should not run when `limit_val_batches=0`"
 
     # check that limit_val_batches has no influence when fast_dev_run is turned on
     model = CurrentModel()
@@ -1031,7 +1016,6 @@ def validation_epoch_end(self, *args, **kwargs):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
     assert trainer.current_epoch == 1
     assert model.validation_step_invoked, "did not run `validation_step` with `fast_dev_run=True`"
-    assert model.validation_epoch_end_invoked, "did not run `validation_epoch_end` with `fast_dev_run=True`"
 
 
 @pytest.mark.parametrize("track_grad_norm", [0, torch.tensor(1), "nan"])
@@ -1166,7 +1150,6 @@ def val_dataloader(self):
             return [DataLoader(RandomDataset(32, 64)), DataLoader(RandomDataset(32, 64))]
 
     model = CustomModel()
-    model.validation_epoch_end = None
     num_sanity_val_steps = 4
 
     trainer = Trainer(
@@ -1182,7 +1165,6 @@ def val_dataloader(self):
             return [DataLoader(RandomDataset(32, 64), batch_size=8), DataLoader(RandomDataset(32, 64))]
 
     model = CustomModelMixedVal()
-    model.validation_epoch_end = None
 
     with patch.object(
         trainer.fit_loop.epoch_loop.val_loop.epoch_loop,
@@ -1208,7 +1190,6 @@ def val_dataloader(self):
             return [DataLoader(RandomDataset(32, 64)), DataLoader(RandomDataset(32, 64))]
 
     model = CustomModel()
-    model.validation_epoch_end = None
     trainer = Trainer(
         default_root_dir=tmpdir, num_sanity_val_steps=-1, limit_val_batches=limit_val_batches, max_steps=1
     )
@@ -1725,9 +1706,6 @@ def validation_step(self, batch, batch_idx):
         loss = self.step(batch)
         self.log("x", loss)
 
-    def validation_epoch_end(self, outputs) -> None:
-        pass
-
 
 @RunIf(skip_windows=True)
 def test_fit_test_synchronization(tmpdir):
diff --git a/tests/tests_pytorch/tuner/test_scale_batch_size.py b/tests/tests_pytorch/tuner/test_scale_batch_size.py
index 2dec52cd8cf8c..aeba26e3aca6b 100644
--- a/tests/tests_pytorch/tuner/test_scale_batch_size.py
+++ b/tests/tests_pytorch/tuner/test_scale_batch_size.py
@@ -459,7 +459,6 @@ def train_dataloader(self):
 
     model = CustomBatchSizeModel(batch_size=16)
     model.validation_step = None
-    model.training_epoch_end = None
     scale_batch_size_kwargs = {"max_trials": 10, "steps_per_trial": 1, "init_val": 500, "mode": scale_method}
 
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=2)
diff --git a/tests/tests_pytorch/utilities/test_all_gather_grad.py b/tests/tests_pytorch/utilities/test_all_gather_grad.py
index 8b8139a9b9c3b..e442e53e339a2 100644
--- a/tests/tests_pytorch/utilities/test_all_gather_grad.py
+++ b/tests/tests_pytorch/utilities/test_all_gather_grad.py
@@ -55,20 +55,19 @@ def test_all_gather_ddp_spawn():
 def test_all_gather_collection(tmpdir):
     class TestModel(BoringModel):
 
-        training_epoch_end_called = False
+        on_train_epoch_end_called = False
 
-        def training_epoch_end(self, outputs) -> None:
-            losses = torch.stack([x["loss"] for x in outputs])
+        def on_train_epoch_end(self):
+            losses = torch.rand(2, 2).t()
             gathered_loss = self.all_gather(
                 {
-                    "losses_tensor_int": torch.rand(2, 2).int().t(),
-                    "losses_tensor_float": torch.rand(2, 2).t(),
+                    "losses_tensor_int": losses.int(),
+                    "losses_tensor_float": losses,
+                    "losses_tensor_list": [losses, losses],
                     "losses_np_ndarray": np.array([1, 2, 3]),
                     "losses_bool": [True, False],
                     "losses_float": [0.0, 1.0, 2.0],
                     "losses_int": [0, 1, 2],
-                    "losses": losses,
-                    "losses_list": [losses, losses],
                 }
             )
             assert gathered_loss["losses_tensor_int"][0].dtype == torch.int32
@@ -80,7 +79,7 @@ def training_epoch_end(self, outputs) -> None:
             assert gathered_loss["losses_int"][0].dtype == torch.int
             assert gathered_loss["losses_list"][0].numel() == 2 * len(losses)
             assert gathered_loss["losses"].numel() == 2 * len(losses)
-            self.training_epoch_end_called = True
+            self.on_train_epoch_end_called = True
 
     seed_everything(42)
 
@@ -102,7 +101,7 @@ def training_epoch_end(self, outputs) -> None:
     )
 
     trainer.fit(model)
-    assert model.training_epoch_end_called
+    assert model.on_train_epoch_end_called
 
 
 @RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
diff --git a/tests/tests_pytorch/utilities/test_auto_restart.py b/tests/tests_pytorch/utilities/test_auto_restart.py
index 5cea629c6ebe7..fd4f912dc8dc3 100644
--- a/tests/tests_pytorch/utilities/test_auto_restart.py
+++ b/tests/tests_pytorch/utilities/test_auto_restart.py
@@ -55,11 +55,11 @@ def validation_step(self, batch, batch_idx):
             self._signal()
         return super().validation_step(batch, batch_idx)
 
-    def training_epoch_end(self, outputs) -> None:
+    def on_train_epoch_end(self):
         if not self.failure_on_step and self.failure_on_training:
             self._signal()
 
-    def validation_epoch_end(self, outputs) -> None:
+    def on_validation_epoch_end(self):
         if not self.failure_on_step and not self.failure_on_training:
             self._signal()
 
@@ -127,7 +127,7 @@ def test_auto_restart_under_signal(on_last_batch, val_check_interval, failure_on
         if val_check_interval == 1.0:
             status = "_FitLoop:on_advance_end"
         else:
-            # `training_epoch_end` happens after `validation_epoch_end` since Lightning v1.4
+            # `on_train_epoch_end` happens after `on_validation_epoch_end` since Lightning v1.4
             status = "_FitLoop:on_advance_end" if failure_on_training else "_TrainingEpochLoop:on_advance_end"
 
     _fit_model(tmpdir, True, val_check_interval, failure_on_step, failure_on_training, on_last_batch, status=status)
diff --git a/tests/tests_pytorch/utilities/test_fetching.py b/tests/tests_pytorch/utilities/test_fetching.py
index e82305bae5cfc..74d8fd9d199db 100644
--- a/tests/tests_pytorch/utilities/test_fetching.py
+++ b/tests/tests_pytorch/utilities/test_fetching.py
@@ -244,7 +244,7 @@ def training_step(self, dataloader_iter, batch_idx):
                 loss.backward()
                 opt.step()
 
-        def training_epoch_end(self, *_):
+        def on_train_epoch_end(self):
             assert self.trainer.fit_loop.epoch_loop.batch_progress.current.ready == 33
             assert self.trainer.fit_loop._data_fetcher.fetched == 64
             assert self.count == 64
@@ -456,8 +456,6 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
         def val_dataloader(self):
             return [super().val_dataloader(), super().val_dataloader()]
 
-        validation_epoch_end = None
-
     model = MyModel()
     fast_dev_run = 2
     trainer = Trainer(