From c06b7be7cefabb283c9123291add3d680e5163c5 Mon Sep 17 00:00:00 2001 From: Magdy Saleh Date: Tue, 26 Jul 2022 19:36:27 +0200 Subject: [PATCH] bring back master for tests --- tests/integration_tests/test_preprocessing.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/test_preprocessing.py b/tests/integration_tests/test_preprocessing.py index e88285801a9..dd119377f2d 100644 --- a/tests/integration_tests/test_preprocessing.py +++ b/tests/integration_tests/test_preprocessing.py @@ -121,12 +121,14 @@ def test_with_split(backend, csv_filename, tmpdir): def test_dask_known_divisions(feature_fn, csv_filename, tmpdir): import dask.dataframe as dd + num_examples = NUM_EXAMPLES + input_features = [feature_fn(os.path.join(tmpdir, "generated_output"))] output_features = [category_feature(vocab_size=5, reduce_input="sum")] - - # num_examples=100 and npartitions=2 to ensure the test is not flaky, by having non-empty post-split datasets. - data_csv = generate_data(input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=100) - data_df = dd.from_pandas(pd.read_csv(data_csv), npartitions=2) + data_csv = generate_data( + input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=num_examples + ) + data_df = dd.from_pandas(pd.read_csv(data_csv), npartitions=10) assert data_df.known_divisions config = {