diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 610f232304..4dc44f113f 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -88,6 +88,7 @@ dependencies: - pydantic - pylibcudf=24.10 - pylint=3.0.3 +- pynvml=11.4 - pypdf=3.17.4 - pypdfium2=4.30 - pytest-asyncio diff --git a/conda/environments/dev_cuda-125_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml index af599fb7de..f27becb108 100644 --- a/conda/environments/dev_cuda-125_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml @@ -73,6 +73,7 @@ dependencies: - pydantic - pylibcudf=24.10 - pylint=3.0.3 +- pynvml=11.4 - pypdfium2=4.30 - pytest-asyncio - pytest-benchmark=4.0 diff --git a/dependencies.yaml b/dependencies.yaml index 538faea3a5..62c15bc9c4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -326,6 +326,7 @@ dependencies: - include-what-you-use=0.20 - isort - pylint=3.0.3 + - pynvml=11.4 - vale=3.7 - vale-styles-microsoft - vale-styles-write-good diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index c1fa094416..6259c79e65 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -78,7 +78,6 @@ Fetch input data for benchmarks: ./scripts/fetch_data.py fetch validation ``` - ### Run E2E Benchmarks Benchmarks are run using `pytest-benchmark`. By default, there are five rounds of measurement. For each round, there will be one iteration of each workflow. Measurements are taken for each round. Final results such as `min`, `max` and `mean` times will be based on these measurements. @@ -124,7 +123,6 @@ The `test_bench_e2e_pipelines.py` script contains several benchmarks within it. - `test_sid_nlp_e2e` - `test_abp_fil_e2e` - `test_phishing_nlp_e2e` -- `test_cloudtrail_ae_e2e` For example, to run E2E benchmarks on the SID NLP workflow: ```bash @@ -138,11 +136,10 @@ pytest -s --run_benchmark --benchmark-enable --benchmark-warmup=on --benchmark-w The console output should look like this: ``` ---------------------------------------------------------------------------------- benchmark: 4 tests -------------------------------------------------------------------------------- +--------------------------------------------------------------------------------- benchmark: 3 tests -------------------------------------------------------------------------------- Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- test_sid_nlp_e2e 1.8907 (1.0) 1.9817 (1.0) 1.9400 (1.0) 0.0325 (2.12) 1.9438 (1.0) 0.0297 (1.21) 2;0 0.5155 (1.0) 5 1 -test_cloudtrail_ae_e2e 3.3403 (1.77) 3.3769 (1.70) 3.3626 (1.73) 0.0153 (1.0) 3.3668 (1.73) 0.0245 (1.0) 1;0 0.2974 (0.58) 5 1 test_abp_fil_e2e 5.1271 (2.71) 5.3044 (2.68) 5.2083 (2.68) 0.0856 (5.59) 5.1862 (2.67) 0.1653 (6.75) 1;0 0.1920 (0.37) 5 1 test_phishing_nlp_e2e 5.6629 (3.00) 6.0987 (3.08) 5.8835 (3.03) 0.1697 (11.08) 5.8988 (3.03) 0.2584 (10.55) 2;0 0.1700 (0.33) 5 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/benchmarks/e2e_test_configs.json b/tests/benchmarks/e2e_test_configs.json index 83449a5517..4d7a134255 100644 --- a/tests/benchmarks/e2e_test_configs.json +++ b/tests/benchmarks/e2e_test_configs.json @@ -1,5 +1,5 @@ { - "triton_server_url": "localhost:8001", + "triton_server_url": "localhost:8000", "test_sid_nlp_e2e": { "file_path": "../../models/datasets/validation-data/sid-validation-data.csv", "repeat": 10, diff --git a/tests/benchmarks/test_bench_completion_pipeline.py b/tests/benchmarks/test_bench_completion_pipeline.py index c45f3ecd9c..27287635bd 100644 --- a/tests/benchmarks/test_bench_completion_pipeline.py +++ b/tests/benchmarks/test_bench_completion_pipeline.py @@ -14,6 +14,7 @@ # limitations under the License. import collections.abc +import os import typing import pytest @@ -82,4 +83,6 @@ def test_completion_pipe(benchmark: collections.abc.Callable[[collections.abc.Ca config: Config, dataset: DatasetManager, llm_service_cls: type[LLMService]): + if llm_service_cls == OpenAIChatService: + os.environ.update({"OPENAI_API_KEY": "test_api_key"}) benchmark(_run_pipeline, config, llm_service_cls, source_df=dataset["countries.csv"]) diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py index b9f6880d3e..14c1d2884f 100644 --- a/tests/benchmarks/test_bench_e2e_pipelines.py +++ b/tests/benchmarks/test_bench_e2e_pipelines.py @@ -35,7 +35,7 @@ from morpheus.stages.preprocess.preprocess_fil_stage import PreprocessFILStage from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage from morpheus.utils.file_utils import load_labels_file -from morpheus.utils.logger import configure_logging +from morpheus.utils.logger import set_log_level E2E_CONFIG_FILE = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json") with open(E2E_CONFIG_FILE, 'r', encoding='UTF-8') as f: @@ -44,7 +44,7 @@ def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_file, model_name): - configure_logging(log_level=logging.INFO) + set_log_level(log_level=logging.DEBUG) pipeline = LinearPipeline(config) pipeline.set_source(FileSourceStage(config, filename=input_file, repeat=repeat)) @@ -71,7 +71,7 @@ def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_fil def fil_pipeline(config: Config, input_file, repeat, output_file, model_name): - configure_logging(log_level=logging.INFO) + set_log_level(log_level=logging.DEBUG) pipeline = LinearPipeline(config) pipeline.set_source(FileSourceStage(config, filename=input_file, repeat=repeat)) diff --git a/tests/benchmarks/test_bench_rag_standalone_pipeline.py b/tests/benchmarks/test_bench_rag_standalone_pipeline.py index e394eaa331..5a0d64d47e 100644 --- a/tests/benchmarks/test_bench_rag_standalone_pipeline.py +++ b/tests/benchmarks/test_bench_rag_standalone_pipeline.py @@ -135,10 +135,12 @@ def test_rag_standalone_pipe(benchmark: collections.abc.Callable[[collections.ab repeat_count: int, import_mod: types.ModuleType, llm_service_name: str): + if llm_service_name == "openai": + os.environ.update({"OPENAI_API_KEY": "test_api_key"}) collection_name = f"test_bench_rag_standalone_pipe_{llm_service_name}" populate_milvus(milvus_server_uri=milvus_server_uri, collection_name=collection_name, - resource_kwargs=import_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE), + resource_kwargs=import_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE), df=dataset["service/milvus_rss_data.json"], overwrite=True) diff --git a/tests/benchmarks/test_bench_vdb_upload_pipeline.py b/tests/benchmarks/test_bench_vdb_upload_pipeline.py index 51ae9842a1..e0e74f60e5 100644 --- a/tests/benchmarks/test_bench_vdb_upload_pipeline.py +++ b/tests/benchmarks/test_bench_vdb_upload_pipeline.py @@ -79,7 +79,7 @@ def _run_pipeline(config: Config, pipe.add_stage( WriteToVectorDBStage(config, resource_name=collection_name, - resource_kwargs=utils_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE), + resource_kwargs=utils_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE), recreate=True, service="milvus", uri=milvus_server_uri)) @@ -92,7 +92,7 @@ def _run_pipeline(config: Config, @pytest.mark.benchmark @pytest.mark.import_mod([ os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py'), - os.path.join(TEST_DIRS.examples_dir, 'llm/common/web_scraper_stage.py'), + os.path.join(TEST_DIRS.examples_dir, 'llm/vdb_upload/module/web_scraper_stage.py'), ]) @mock.patch('feedparser.http.get') @mock.patch('requests.Session')