ludwig-ai · arnavgarg1 · Sep 29, 2022 · Sep 28, 2022 · Sep 29, 2022
diff --git a/ludwig/utils/visualization_utils.py b/ludwig/utils/visualization_utils.py
@@ -21,6 +21,7 @@
 
 import numpy as np
 import pandas as pd
+import ptitprince as pt
 from packaging import version
 
 from ludwig.constants import SPACE, TRAINING, VALIDATION
@@ -1369,7 +1370,27 @@ def hyperopt_float_plot(hyperopt_results_df, hp_name, metric, title, filename, l
 def hyperopt_category_plot(hyperopt_results_df, hp_name, metric, title, filename, log_scale=True):
     sns.set_style("whitegrid")
     plt.figure()
-    seaborn_figure = sns.violinplot(x=hp_name, y=metric, data=hyperopt_results_df, fit_reg=False)
+
+    # Ensure that all parameter values have at least 2 trials, otherwise the Raincloud Plot will create awkward
+    # looking "flat clouds" in the cloud part of the plot (the "rain" part is ok with 1 trial). In this case,
+    # just use stripplots since they are categorical scatter plots.
+    parameter_to_trial_count = hyperopt_results_df[hp_name].value_counts()
+    parameter_to_trial_count = parameter_to_trial_count[parameter_to_trial_count < 2]
+
+    if len(parameter_to_trial_count) != 0:
+        seaborn_figure = sns.stripplot(x=hp_name, y=metric, data=hyperopt_results_df, size=5)
+    else:
+        seaborn_figure = pt.RainCloud(
+            x=hp_name,
+            y=metric,
+            data=hyperopt_results_df,
+            palette="Set2",
+            bw=0.2,
+            width_viol=0.7,
+            point_size=6,
+            cut=1,
+        )
+
     seaborn_figure.set_title(title)
     seaborn_figure.set(ylabel=metric)
     sns.despine()
@@ -1387,6 +1408,10 @@ def hyperopt_pair_plot(hyperopt_results_df, metric, title, filename):
     params.remove(metric)
     num_param = len(params)
 
+    # Pair plot is empty if there's only 1 parameter, so skip creating a pair plot
+    if num_param == 1:
+        return
+
     sns.set_style("white")
     fig = plt.figure(figsize=(20, 20))
     fig.suptitle(title)

diff --git a/requirements_viz.txt b/requirements_viz.txt
@@ -1,4 +1,5 @@
 matplotlib>=3.4; python_version > '3.6'
 matplotlib>=3.0,<3.4; python_version <= '3.6'
-seaborn>=0.7
+seaborn>=0.7,<0.12
 hiplot
+ptitprince
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -56,28 +56,37 @@ def yaml_filename():
 
 
 @pytest.fixture(scope="module")
-def hyperopt_results():
-    """This function generates hyperopt results."""
-    input_features = [
-        text_feature(name="utterance", encoder={"cell_type": "lstm", "reduce_output": "sum"}),
-        category_feature(encoder={"vocab_size": 2}, reduce_input="sum"),
-    ]
-
-    output_features = [category_feature(decoder={"vocab_size": 2}, reduce_input="sum")]
-
-    csv_filename = uuid.uuid4().hex[:10].upper() + ".csv"
-    rel_path = generate_data(input_features, output_features, csv_filename)
-
-    config = {
-        INPUT_FEATURES: input_features,
-        OUTPUT_FEATURES: output_features,
-        COMBINER: {TYPE: "concat", "num_fc_layers": 2},
-        TRAINER: {EPOCHS: 2, "learning_rate": 0.001},
+def hyperopt_results_single_parameter():
+    config, rel_path = _get_sample_config()
+    config[HYPEROPT] = {
+        "parameters": {
+            "trainer.learning_rate": {
+                "space": "loguniform",
+                "lower": 0.0001,
+                "upper": 0.01,
+            }
+        },
+        "goal": "minimize",
+        "output_feature": config[OUTPUT_FEATURES][0][NAME],
+        "validation_metrics": "loss",
+        "executor": {
+            "type": "ray",
+            "num_samples": 2,
+        },
+        "search_alg": {
+            "type": "variant_generator",
+        },
     }
+    # Prevent resume from failure since this results in failures in other tests
+    hyperopt(config, dataset=rel_path, output_directory="results", experiment_name="hyperopt_test", resume=False)
+    return os.path.join(os.path.abspath("results"), "hyperopt_test")
 
-    output_feature_name = output_features[0][NAME]
 
-    hyperopt_configs = {
+@pytest.fixture(scope="module")
+def hyperopt_results_multiple_parameters():
+    config, rel_path = _get_sample_config()
+    output_feature_name = config[OUTPUT_FEATURES][0][NAME]
+    config[HYPEROPT] = {
         "parameters": {
             "trainer.learning_rate": {
                 "space": "loguniform",
@@ -98,12 +107,8 @@ def hyperopt_results():
             "type": "variant_generator",
         },
     }
-
-    # add hyperopt parameter space to the config
-    config[HYPEROPT] = hyperopt_configs
-
-    hyperopt(config, dataset=rel_path, output_directory="results", experiment_name="hyperopt_test")
-
+    # Prevent resume from failure since this results in failures in other tests
+    hyperopt(config, dataset=rel_path, output_directory="results", experiment_name="hyperopt_test", resume=False)
     return os.path.join(os.path.abspath("results"), "hyperopt_test")
 
 
@@ -167,3 +172,21 @@ def _get_default_system_config():
         "object_store_full_delay_ms": 100,
     }
     return system_config
+
+
+def _get_sample_config():
+    """Returns a sample config."""
+    input_features = [
+        text_feature(name="utterance", encoder={"cell_type": "lstm", "reduce_output": "sum"}),
+        category_feature(encoder={"vocab_size": 2}, reduce_input="sum"),
+    ]
+    output_features = [category_feature(decoder={"vocab_size": 2}, reduce_input="sum")]
+    csv_filename = uuid.uuid4().hex[:10].upper() + ".csv"
+    rel_path = generate_data(input_features, output_features, csv_filename)
+    config = {
+        INPUT_FEATURES: input_features,
+        OUTPUT_FEATURES: output_features,
+        COMBINER: {TYPE: "concat", "num_fc_layers": 2},
+        TRAINER: {EPOCHS: 2, "learning_rate": 0.001},
+    }
+    return config, rel_path
diff --git a/tests/integration_tests/test_visualization_api.py b/tests/integration_tests/test_visualization_api.py
@@ -844,10 +844,17 @@ def test_frequency_vs_f1_vis_api(experiment_to_use):
 
 
 @pytest.mark.distributed
-def test_hyperopt_report_vis_api(hyperopt_results, tmpdir):
+def test_hyperopt_report_vis_api(hyperopt_results_multiple_parameters, tmpdir):
     vis_dir = os.path.join(tmpdir, "visualizations")
 
-    visualize.hyperopt_report(os.path.join(hyperopt_results, HYPEROPT_STATISTICS_FILE_NAME), output_directory=vis_dir)
+    # Ensure visualizations directory is empty before creating plots
+    if os.path.exists(vis_dir):
+        for f in os.listdir(vis_dir):
+            os.remove(os.path.join(vis_dir, f))
+
+    visualize.hyperopt_report(
+        os.path.join(hyperopt_results_multiple_parameters, HYPEROPT_STATISTICS_FILE_NAME), output_directory=vis_dir
+    )
 
     # test for creation of output directory
     assert os.path.isdir(vis_dir)
@@ -857,13 +864,39 @@ def test_hyperopt_report_vis_api(hyperopt_results, tmpdir):
 
 
 @pytest.mark.distributed
-def test_hyperopt_hiplot_vis_api(hyperopt_results, tmpdir):
+def test_hyperopt_hiplot_vis_api(hyperopt_results_multiple_parameters, tmpdir):
     vis_dir = os.path.join(tmpdir, "visualizations")
 
-    visualize.hyperopt_hiplot(os.path.join(hyperopt_results, HYPEROPT_STATISTICS_FILE_NAME), output_directory=vis_dir)
+    # Ensure visualizations directory is empty before creating plots
+    if os.path.exists(vis_dir):
+        for f in os.listdir(vis_dir):
+            os.remove(os.path.join(vis_dir, f))
+
+    visualize.hyperopt_hiplot(
+        os.path.join(hyperopt_results_multiple_parameters, HYPEROPT_STATISTICS_FILE_NAME), output_directory=vis_dir
+    )
 
     # test for creation of output directory
     assert os.path.isdir(vis_dir)
 
     # test for generatated html page
     assert os.path.isfile(os.path.join(vis_dir, "hyperopt_hiplot.html"))
+
+
+@pytest.mark.distributed
+def test_hyperopt_report_vis_api_no_pairplot(hyperopt_results_single_parameter, tmpdir):
+    vis_dir = os.path.join(tmpdir, "visualizations")
+
+    # Ensure visualizations directory is empty before creating plots
+    if os.path.exists(vis_dir):
+        for f in os.listdir(vis_dir):
+            os.remove(os.path.join(vis_dir, f))
+
+    visualize.hyperopt_report(
+        os.path.join(hyperopt_results_single_parameter, HYPEROPT_STATISTICS_FILE_NAME), output_directory=vis_dir
+    )
+
+    figure_cnt = glob.glob(os.path.join(vis_dir, "*"))
+
+    # Only create plot for single parameter and skip pairplot creation
+    assert len(figure_cnt) == 1