From c06b7be7cefabb283c9123291add3d680e5163c5 Mon Sep 17 00:00:00 2001
From: Magdy Saleh <magdy@predibase.com>
Date: Tue, 26 Jul 2022 19:36:27 +0200
Subject: [PATCH] bring back master for tests

---
 tests/integration_tests/test_preprocessing.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/integration_tests/test_preprocessing.py b/tests/integration_tests/test_preprocessing.py
index e88285801a9..dd119377f2d 100644
--- a/tests/integration_tests/test_preprocessing.py
+++ b/tests/integration_tests/test_preprocessing.py
@@ -121,12 +121,14 @@ def test_with_split(backend, csv_filename, tmpdir):
 def test_dask_known_divisions(feature_fn, csv_filename, tmpdir):
     import dask.dataframe as dd
 
+    num_examples = NUM_EXAMPLES
+
     input_features = [feature_fn(os.path.join(tmpdir, "generated_output"))]
     output_features = [category_feature(vocab_size=5, reduce_input="sum")]
-
-    # num_examples=100 and npartitions=2 to ensure the test is not flaky, by having non-empty post-split datasets.
-    data_csv = generate_data(input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=100)
-    data_df = dd.from_pandas(pd.read_csv(data_csv), npartitions=2)
+    data_csv = generate_data(
+        input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=num_examples
+    )
+    data_df = dd.from_pandas(pd.read_csv(data_csv), npartitions=10)
     assert data_df.known_divisions
 
     config = {