From 0d2bfba63a44e85ad1fbd5345ceb8513b2a29fe1 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 18 Oct 2024 17:36:49 -0400 Subject: [PATCH 01/17] benchmark updates/fixes --- .../dev_cuda-125_arch-x86_64.yaml | 1 + .../examples_cuda-125_arch-x86_64.yaml | 2 +- .../benchmarks/test_bench_e2e_dfp_pipeline.py | 6 +-- tests/benchmarks/README.md | 34 +++++------------ tests/benchmarks/e2e_test_configs.json | 2 +- .../test_bench_completion_pipeline.py | 3 ++ tests/benchmarks/test_bench_e2e_pipelines.py | 37 +++---------------- .../test_bench_rag_standalone_pipeline.py | 4 +- .../test_bench_vdb_upload_pipeline.py | 4 +- 9 files changed, 29 insertions(+), 64 deletions(-) diff --git a/conda/environments/dev_cuda-125_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml index af599fb7de..8122c9c64e 100644 --- a/conda/environments/dev_cuda-125_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml @@ -73,6 +73,7 @@ dependencies: - pydantic - pylibcudf=24.10 - pylint=3.0.3 +- pynvml=11.4.1 - pypdfium2=4.30 - pytest-asyncio - pytest-benchmark=4.0 diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index ffcae28e4a..b97f031d91 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -54,7 +54,6 @@ dependencies: - requests-toolbelt=1.0 - s3fs - scikit-learn=1.3.2 -- sentence-transformers=2.7 - sqlalchemy<2.0 - tqdm=4 - transformers=4.36.2 @@ -77,5 +76,6 @@ dependencies: - milvus==2.3.5 - nemollm==0.3.5 - pymilvus==2.3.6 + - sentence-transformers==2.7 - torch==2.4.0+cu124 name: examples_cuda-125_arch-x86_64 diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py index 051e3b7f25..6376349bc7 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py @@ -40,7 +40,7 @@ from morpheus.stages.postprocess.serialize_stage import SerializeStage from morpheus.utils.column_info import DataFrameInputSchema from morpheus.utils.file_utils import date_extractor -from morpheus.utils.logger import configure_logging +from morpheus.utils.logger import set_log_level from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage @@ -105,7 +105,7 @@ def dfp_training_pipeline_stages(pipe_config: Config, filenames: typing.List[str], reuse_cache=False): - configure_logging(log_level=logger.level) + set_log_level(log_level=logger.level) pipeline = LinearPipeline(pipe_config) pipeline.set_source(MultiFileSource(pipe_config, filenames=filenames)) @@ -157,7 +157,7 @@ def dfp_inference_pipeline_stages(pipe_config: Config, output_filepath: str, reuse_cache=False): - configure_logging(log_level=logger.level) + set_log_level(log_level=logger.level) pipeline = LinearPipeline(pipe_config) pipeline.set_source(MultiFileSource(pipe_config, filenames=filenames)) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 148dbb3d44..b5d0bf826a 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -48,32 +48,20 @@ Once Triton server finishes starting up, it will display the status of all loade ### Set up Morpheus Dev Container -If you don't already have the Morpheus Dev container, run the following to build it: -```bash -./docker/build_container_dev.sh -``` - -Now run the container: -```bash -./docker/run_container_dev.sh -``` +Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-docker-container) to set up your development environment in either a Docker container or Conda environment. -Note that Morpheus containers are tagged by date. By default, `run_container_dev.sh` will try to use current date as tag. Therefore, if you are trying to run a container that was not built on the current date, you must set the `DOCKER_IMAGE_TAG` environment variable. For example, -```bash -DOCKER_IMAGE_TAG=dev-221003 ./docker/run_container_dev.sh -``` +##### Install the required dependencies -In the `/workspace` directory of the container, run the following to compile Morpheus: +Run the following in the `/workspace` directory of your dev container: ```bash -./scripts/compile.sh +mamba env update \ + -n ${CONDA_DEFAULT_ENV} \ + --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml ``` -Now install Morpheus: -```bash -pip install -e /workspace -``` +##### Fetch input data for benchmarks -Fetch input data for benchmarks: +Run the following in the `/workspace` directory of your dev container: ```bash ./scripts/fetch_data.py fetch validation ``` @@ -124,7 +112,6 @@ The `test_bench_e2e_pipelines.py` script contains several benchmarks within it. - `test_sid_nlp_e2e` - `test_abp_fil_e2e` - `test_phishing_nlp_e2e` -- `test_cloudtrail_ae_e2e` For example, to run E2E benchmarks on the SID NLP workflow: ```bash @@ -138,11 +125,10 @@ pytest -s --run_benchmark --benchmark-enable --benchmark-warmup=on --benchmark-w The console output should look like this: ``` ---------------------------------------------------------------------------------- benchmark: 4 tests -------------------------------------------------------------------------------- +--------------------------------------------------------------------------------- benchmark: 3 tests -------------------------------------------------------------------------------- Name (time in s) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- test_sid_nlp_e2e 1.8907 (1.0) 1.9817 (1.0) 1.9400 (1.0) 0.0325 (2.12) 1.9438 (1.0) 0.0297 (1.21) 2;0 0.5155 (1.0) 5 1 -test_cloudtrail_ae_e2e 3.3403 (1.77) 3.3769 (1.70) 3.3626 (1.73) 0.0153 (1.0) 3.3668 (1.73) 0.0245 (1.0) 1;0 0.2974 (0.58) 5 1 test_abp_fil_e2e 5.1271 (2.71) 5.3044 (2.68) 5.2083 (2.68) 0.0856 (5.59) 5.1862 (2.67) 0.1653 (6.75) 1;0 0.1920 (0.37) 5 1 test_phishing_nlp_e2e 5.6629 (3.00) 6.0987 (3.08) 5.8835 (3.03) 0.1697 (11.08) 5.8988 (3.03) 0.2584 (10.55) 2;0 0.1700 (0.33) 5 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -193,7 +179,7 @@ Additional benchmark stats for each workflow: ### Production DFP E2E Benchmarks -Note that the `test_cloudtrail_ae_e2e` benchmarks measure performance of a pipeline built using [Starter DFP](../../examples/digital_fingerprinting/starter/README.md) stages. Separate benchmark tests are also provided to measure performance of the example [Production DFP](../../examples/digital_fingerprinting/production/README.md) pipelines. More information about running those benchmarks can be found [here](../../examples/digital_fingerprinting/production/morpheus/benchmarks/README.md). +Separate benchmark tests are provided to measure performance of the example [Production DFP](../../examples/digital_fingerprinting/production/README.md) pipelines. More information about running those benchmarks can be found [here](../../examples/digital_fingerprinting/production/morpheus/benchmarks/README.md). You can use the same Dev container created here to run the Production DFP benchmarks. You would just need to install additional dependencies as follows: diff --git a/tests/benchmarks/e2e_test_configs.json b/tests/benchmarks/e2e_test_configs.json index eae85c1deb..cda83d3af5 100644 --- a/tests/benchmarks/e2e_test_configs.json +++ b/tests/benchmarks/e2e_test_configs.json @@ -1,5 +1,5 @@ { - "triton_server_url": "localhost:8001", + "triton_server_url": "localhost:8000", "test_sid_nlp_e2e": { "file_path": "../../models/datasets/validation-data/sid-validation-data.csv", "repeat": 10, diff --git a/tests/benchmarks/test_bench_completion_pipeline.py b/tests/benchmarks/test_bench_completion_pipeline.py index 20f921d228..9ec4d99616 100644 --- a/tests/benchmarks/test_bench_completion_pipeline.py +++ b/tests/benchmarks/test_bench_completion_pipeline.py @@ -14,6 +14,7 @@ # limitations under the License. import collections.abc +import os import typing import pytest @@ -82,4 +83,6 @@ def test_completion_pipe(benchmark: collections.abc.Callable[[collections.abc.Ca config: Config, dataset: DatasetManager, llm_service_cls: type[LLMService]): + if llm_service_cls == OpenAIChatService: + os.environ.update({"OPENAI_API_KEY": "test_api_key"}) benchmark(_run_pipeline, config, llm_service_cls, source_df=dataset["countries.csv"]) diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py index e99e7bbc07..b12d07a2de 100644 --- a/tests/benchmarks/test_bench_e2e_pipelines.py +++ b/tests/benchmarks/test_bench_e2e_pipelines.py @@ -21,7 +21,6 @@ from _utils import TEST_DIRS from morpheus.config import Config -from morpheus.config import ConfigAutoEncoder from morpheus.config import ConfigFIL from morpheus.config import CppConfig from morpheus.config import PipelineModes @@ -41,7 +40,7 @@ from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage from morpheus.stages.preprocess.train_ae_stage import TrainAEStage from morpheus.utils.file_utils import load_labels_file -from morpheus.utils.logger import configure_logging +from morpheus.utils.logger import set_log_level E2E_CONFIG_FILE = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json") with open(E2E_CONFIG_FILE, 'r', encoding='UTF-8') as f: @@ -50,7 +49,7 @@ def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_file, model_name): - configure_logging(log_level=logging.INFO) + set_log_level(log_level=logging.DEBUG) pipeline = LinearPipeline(config) pipeline.set_source(FileSourceStage(config, filename=input_file, repeat=repeat)) @@ -77,7 +76,7 @@ def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_fil def fil_pipeline(config: Config, input_file, repeat, output_file, model_name): - configure_logging(log_level=logging.INFO) + set_log_level(log_level=logging.DEBUG) pipeline = LinearPipeline(config) pipeline.set_source(FileSourceStage(config, filename=input_file, repeat=repeat)) @@ -99,7 +98,8 @@ def fil_pipeline(config: Config, input_file, repeat, output_file, model_name): def ae_pipeline(config: Config, input_glob, repeat, train_data_glob, output_file): - configure_logging(log_level=logging.INFO) + set_log_level(log_level=logging.DEBUG) + pipeline = LinearPipeline(config) pipeline.set_source(CloudTrailSourceStage(config, input_glob=input_glob, max_files=200, repeat=repeat)) pipeline.add_stage( @@ -196,30 +196,3 @@ def test_phishing_nlp_e2e(benchmark, tmp_path): model_name = "phishing-bert-onnx" benchmark(nlp_pipeline, config, input_filepath, repeat, vocab_filepath, output_filepath, model_name) - - -@pytest.mark.benchmark -def test_cloudtrail_ae_e2e(benchmark, tmp_path): - - config = Config() - config.mode = PipelineModes.AE - config.num_threads = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["num_threads"] - config.pipeline_batch_size = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["pipeline_batch_size"] - config.model_max_batch_size = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["model_max_batch_size"] - config.feature_length = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["feature_length"] - config.edge_buffer_size = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["edge_buffer_size"] - config.class_labels = ["reconstruct_loss", "zscore"] - - config.ae = ConfigAutoEncoder() - config.ae.userid_column_name = "userIdentityaccountId" - config.ae.userid_filter = "Account-123456789" - ae_cols_filepath = os.path.join(TEST_DIRS.data_dir, 'columns_ae_cloudtrail.txt') - config.ae.feature_columns = load_labels_file(ae_cols_filepath) - CppConfig.set_should_use_cpp(False) - - input_glob = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["input_glob_path"] - repeat = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["repeat"] - train_glob = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["train_glob_path"] - output_filepath = os.path.join(tmp_path, "cloudtrail_ae_e2e_output.csv") - - benchmark(ae_pipeline, config, input_glob, repeat, train_glob, output_filepath) diff --git a/tests/benchmarks/test_bench_rag_standalone_pipeline.py b/tests/benchmarks/test_bench_rag_standalone_pipeline.py index 8f531326a8..1b8a7b65f3 100644 --- a/tests/benchmarks/test_bench_rag_standalone_pipeline.py +++ b/tests/benchmarks/test_bench_rag_standalone_pipeline.py @@ -135,10 +135,12 @@ def test_rag_standalone_pipe(benchmark: collections.abc.Callable[[collections.ab repeat_count: int, import_mod: types.ModuleType, llm_service_name: str): + if llm_service_name=="openai": + os.environ.update({"OPENAI_API_KEY": "test_api_key"}) collection_name = f"test_bench_rag_standalone_pipe_{llm_service_name}" populate_milvus(milvus_server_uri=milvus_server_uri, collection_name=collection_name, - resource_kwargs=import_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE), + resource_kwargs=import_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE), df=dataset["service/milvus_rss_data.json"], overwrite=True) diff --git a/tests/benchmarks/test_bench_vdb_upload_pipeline.py b/tests/benchmarks/test_bench_vdb_upload_pipeline.py index f7864fb779..90e3321164 100644 --- a/tests/benchmarks/test_bench_vdb_upload_pipeline.py +++ b/tests/benchmarks/test_bench_vdb_upload_pipeline.py @@ -79,7 +79,7 @@ def _run_pipeline(config: Config, pipe.add_stage( WriteToVectorDBStage(config, resource_name=collection_name, - resource_kwargs=utils_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE), + resource_kwargs=utils_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE), recreate=True, service="milvus", uri=milvus_server_uri)) @@ -92,7 +92,7 @@ def _run_pipeline(config: Config, @pytest.mark.benchmark @pytest.mark.import_mod([ os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py'), - os.path.join(TEST_DIRS.examples_dir, 'llm/common/web_scraper_stage.py'), + os.path.join(TEST_DIRS.examples_dir, 'llm/vdb_upload/module/web_scraper_stage.py'), ]) @mock.patch('feedparser.http.get') @mock.patch('requests.Session') From 1226484de1d509c754b90d8028cd6f911b8f1efc Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 18 Oct 2024 20:09:22 -0400 Subject: [PATCH 02/17] flake8 fix --- tests/benchmarks/test_bench_rag_standalone_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmarks/test_bench_rag_standalone_pipeline.py b/tests/benchmarks/test_bench_rag_standalone_pipeline.py index 1b8a7b65f3..e9b35c5e81 100644 --- a/tests/benchmarks/test_bench_rag_standalone_pipeline.py +++ b/tests/benchmarks/test_bench_rag_standalone_pipeline.py @@ -135,7 +135,7 @@ def test_rag_standalone_pipe(benchmark: collections.abc.Callable[[collections.ab repeat_count: int, import_mod: types.ModuleType, llm_service_name: str): - if llm_service_name=="openai": + if llm_service_name == "openai": os.environ.update({"OPENAI_API_KEY": "test_api_key"}) collection_name = f"test_bench_rag_standalone_pipe_{llm_service_name}" populate_milvus(milvus_server_uri=milvus_server_uri, From 9ea5d9b38ba88e1cfb90bb104669685c484d25c1 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 18 Oct 2024 20:57:48 -0400 Subject: [PATCH 03/17] update dependencies.yaml --- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 0281cf6b68..ac9b3c0d84 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -107,7 +107,6 @@ dependencies: - s3fs - scikit-build=0.17.6 - scikit-learn=1.3.2 -- sentence-transformers=2.7 - sphinx - sphinx_rtd_theme - sqlalchemy<2.0 @@ -140,5 +139,6 @@ dependencies: - nemollm==0.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 + - sentence-transformers==2.7 - torch==2.4.0+cu124 name: all_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 95809bb0ee..5cfb680724 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -459,7 +459,6 @@ dependencies: - *pypdfium2 - *python-docx - requests-toolbelt=1.0 # Transitive dep needed by nemollm, specified here to ensure we get a compatible version - - sentence-transformers=2.7 - pip - pip: - langchain==0.1.16 @@ -467,6 +466,7 @@ dependencies: - faiss-cpu - google-search-results==2.4 - nemollm==0.3.5 + - sentence-transformers==2.7 model-training-tuning: common: From c72d6ab6c5ecd7bb93fd3eef2b0cc50aa98d4ed1 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 18 Oct 2024 21:17:02 -0400 Subject: [PATCH 04/17] add pynvml to dependencies.yaml --- conda/environments/all_cuda-125_arch-x86_64.yaml | 1 + dependencies.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index ac9b3c0d84..8ee3f11575 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -88,6 +88,7 @@ dependencies: - pydantic - pylibcudf=24.10 - pylint=3.0.3 +- pynvml=11.4.1 - pypdf=3.17.4 - pypdfium2=4.30 - pytest-asyncio diff --git a/dependencies.yaml b/dependencies.yaml index 7c1dc626fe..be64c7cc82 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -311,6 +311,7 @@ dependencies: - include-what-you-use=0.20 - isort - pylint=3.0.3 + - pynvml=11.4.1 - vale=3.7 - vale-styles-microsoft - vale-styles-write-good From 3b4fc836b3f4b52dac0e203ac6b74a549f696fab Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 18 Oct 2024 21:30:36 -0400 Subject: [PATCH 05/17] readme update --- tests/benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index b5d0bf826a..78619a169f 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -48,7 +48,7 @@ Once Triton server finishes starting up, it will display the status of all loade ### Set up Morpheus Dev Container -Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-docker-container) to set up your development environment in either a Docker container or Conda environment. +Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-docker-container) to build your Morpheus dev container. ##### Install the required dependencies From 73f25a9e6f5f0e1b7411e6d7fafa95712ddae0f2 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 18 Oct 2024 22:29:49 -0400 Subject: [PATCH 06/17] Remove remnant e2e autencoder benchmark code --- tests/benchmarks/test_bench_e2e_pipelines.py | 28 -------------------- 1 file changed, 28 deletions(-) diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py index b12d07a2de..14c1d2884f 100644 --- a/tests/benchmarks/test_bench_e2e_pipelines.py +++ b/tests/benchmarks/test_bench_e2e_pipelines.py @@ -26,19 +26,14 @@ from morpheus.config import PipelineModes from morpheus.pipeline.linear_pipeline import LinearPipeline from morpheus.stages.general.monitor_stage import MonitorStage -from morpheus.stages.inference.auto_encoder_inference_stage import AutoEncoderInferenceStage from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage -from morpheus.stages.input.cloud_trail_source_stage import CloudTrailSourceStage from morpheus.stages.input.file_source_stage import FileSourceStage from morpheus.stages.output.write_to_file_stage import WriteToFileStage from morpheus.stages.postprocess.add_classifications_stage import AddClassificationsStage -from morpheus.stages.postprocess.add_scores_stage import AddScoresStage from morpheus.stages.postprocess.serialize_stage import SerializeStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage -from morpheus.stages.preprocess.preprocess_ae_stage import PreprocessAEStage from morpheus.stages.preprocess.preprocess_fil_stage import PreprocessFILStage from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage -from morpheus.stages.preprocess.train_ae_stage import TrainAEStage from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import set_log_level @@ -96,29 +91,6 @@ def fil_pipeline(config: Config, input_file, repeat, output_file, model_name): pipeline.run() -def ae_pipeline(config: Config, input_glob, repeat, train_data_glob, output_file): - - set_log_level(log_level=logging.DEBUG) - - pipeline = LinearPipeline(config) - pipeline.set_source(CloudTrailSourceStage(config, input_glob=input_glob, max_files=200, repeat=repeat)) - pipeline.add_stage( - TrainAEStage(config, - train_data_glob=train_data_glob, - source_stage_class="morpheus.stages.input.cloud_trail_source_stage.CloudTrailSourceStage", - seed=42, - sort_glob=True)) - pipeline.add_stage(PreprocessAEStage(config)) - pipeline.add_stage(AutoEncoderInferenceStage(config)) - pipeline.add_stage(AddScoresStage(config)) - pipeline.add_stage(MonitorStage(config, log_level=logging.INFO)) - pipeline.add_stage(SerializeStage(config)) - pipeline.add_stage(WriteToFileStage(config, filename=output_file, overwrite=True)) - - pipeline.build() - pipeline.run() - - @pytest.mark.benchmark def test_sid_nlp_e2e(benchmark, tmp_path): From 1871a63e55e1e30a6e5036351be7007cfd8e85a7 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 10:53:31 -0400 Subject: [PATCH 07/17] readme update --- tests/benchmarks/README.md | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 78619a169f..67fed4b1eb 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -46,24 +46,14 @@ Once Triton server finishes starting up, it will display the status of all loade +--------------------+---------+--------+ ``` -### Set up Morpheus Dev Container +### Build Morpheus conda environment -Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-docker-container) to build your Morpheus dev container. +Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-a-conda-environment) to build your Morpheus conda environment. -##### Install the required dependencies - -Run the following in the `/workspace` directory of your dev container: -```bash -mamba env update \ - -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml -``` - -##### Fetch input data for benchmarks - -Run the following in the `/workspace` directory of your dev container: +In step 3 of the instructions, choose the command to install all dependencies. ```bash -./scripts/fetch_data.py fetch validation +conda env create --solver=libmamba -n morpheus --file conda/environments/all_cuda-125_arch-x86_64.yaml +conda activate morpheus ``` From d56b5d25046294dbb1acb7b375606859c7797d18 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 11:09:42 -0400 Subject: [PATCH 08/17] readme update --- tests/benchmarks/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 67fed4b1eb..0deab37b43 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -46,9 +46,9 @@ Once Triton server finishes starting up, it will display the status of all loade +--------------------+---------+--------+ ``` -### Build Morpheus conda environment +### Build Morpheus Conda environment -Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-a-conda-environment) to build your Morpheus conda environment. +Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-a-conda-environment) to build your Morpheus Conda environment. In step 3 of the instructions, choose the command to install all dependencies. ```bash From 5c0df093b34d55fb04d3ba1867571a4e7d8aaa4d Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 11:10:33 -0400 Subject: [PATCH 09/17] change pynvml version to 11.4 --- dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index be64c7cc82..76518251c5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -311,7 +311,7 @@ dependencies: - include-what-you-use=0.20 - isort - pylint=3.0.3 - - pynvml=11.4.1 + - pynvml=11.4 - vale=3.7 - vale-styles-microsoft - vale-styles-write-good From 1be192cd4d344e1787cfb90824785f27581f51dd Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 11:28:01 -0400 Subject: [PATCH 10/17] regenerate updated env yamls --- conda/environments/all_cuda-125_arch-x86_64.yaml | 3 ++- conda/environments/dev_cuda-125_arch-x86_64.yaml | 3 ++- conda/environments/examples_cuda-125_arch-x86_64.yaml | 1 + conda/environments/model-utils_cuda-125_arch-x86_64.yaml | 1 + conda/environments/runtime_cuda-125_arch-x86_64.yaml | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 8ee3f11575..7afd5778d3 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -88,7 +88,7 @@ dependencies: - pydantic - pylibcudf=24.10 - pylint=3.0.3 -- pynvml=11.4.1 +- pynvml=11.4 - pypdf=3.17.4 - pypdfium2=4.30 - pytest-asyncio @@ -143,3 +143,4 @@ dependencies: - sentence-transformers==2.7 - torch==2.4.0+cu124 name: all_cuda-125_arch-x86_64 + diff --git a/conda/environments/dev_cuda-125_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml index 8122c9c64e..f18b9e4e68 100644 --- a/conda/environments/dev_cuda-125_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml @@ -73,7 +73,7 @@ dependencies: - pydantic - pylibcudf=24.10 - pylint=3.0.3 -- pynvml=11.4.1 +- pynvml=11.4 - pypdfium2=4.30 - pytest-asyncio - pytest-benchmark=4.0 @@ -114,3 +114,4 @@ dependencies: - pytest-kafka==0.6.0 - torch==2.4.0+cu124 name: dev_cuda-125_arch-x86_64 + diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 5e9b725d6f..89d67d37ca 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -80,3 +80,4 @@ dependencies: - sentence-transformers==2.7 - torch==2.4.0+cu124 name: examples_cuda-125_arch-x86_64 + diff --git a/conda/environments/model-utils_cuda-125_arch-x86_64.yaml b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml index 2957c36473..493c6d5dc6 100644 --- a/conda/environments/model-utils_cuda-125_arch-x86_64.yaml +++ b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml @@ -21,3 +21,4 @@ dependencies: - transformers=4.36.2 - xgboost name: model-utils_cuda-125_arch-x86_64 + diff --git a/conda/environments/runtime_cuda-125_arch-x86_64.yaml b/conda/environments/runtime_cuda-125_arch-x86_64.yaml index 2551739061..93399f21f9 100644 --- a/conda/environments/runtime_cuda-125_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-125_arch-x86_64.yaml @@ -54,3 +54,4 @@ dependencies: - pymilvus==2.3.6 - torch==2.4.0+cu124 name: runtime_cuda-125_arch-x86_64 + From 7b679261e6c5f5d16679fe45ccd551d3602b0b02 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 12:30:51 -0400 Subject: [PATCH 11/17] clean up env yamls --- conda/environments/all_cuda-125_arch-x86_64.yaml | 3 +-- conda/environments/dev_cuda-125_arch-x86_64.yaml | 3 +-- conda/environments/examples_cuda-125_arch-x86_64.yaml | 3 +-- conda/environments/model-utils_cuda-125_arch-x86_64.yaml | 3 +-- conda/environments/runtime_cuda-125_arch-x86_64.yaml | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7afd5778d3..71bf78bde7 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -142,5 +142,4 @@ dependencies: - pytest-kafka==0.6.0 - sentence-transformers==2.7 - torch==2.4.0+cu124 -name: all_cuda-125_arch-x86_64 - +name: all_cuda-125_arch-x86_64 \ No newline at end of file diff --git a/conda/environments/dev_cuda-125_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml index f18b9e4e68..255881a81d 100644 --- a/conda/environments/dev_cuda-125_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml @@ -113,5 +113,4 @@ dependencies: - pymilvus==2.3.6 - pytest-kafka==0.6.0 - torch==2.4.0+cu124 -name: dev_cuda-125_arch-x86_64 - +name: dev_cuda-125_arch-x86_64 \ No newline at end of file diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 89d67d37ca..6aa877726e 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -79,5 +79,4 @@ dependencies: - pymilvus==2.3.6 - sentence-transformers==2.7 - torch==2.4.0+cu124 -name: examples_cuda-125_arch-x86_64 - +name: examples_cuda-125_arch-x86_64 \ No newline at end of file diff --git a/conda/environments/model-utils_cuda-125_arch-x86_64.yaml b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml index 493c6d5dc6..ae605e0cf7 100644 --- a/conda/environments/model-utils_cuda-125_arch-x86_64.yaml +++ b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml @@ -20,5 +20,4 @@ dependencies: - seqeval=1.2.2 - transformers=4.36.2 - xgboost -name: model-utils_cuda-125_arch-x86_64 - +name: model-utils_cuda-125_arch-x86_64 \ No newline at end of file diff --git a/conda/environments/runtime_cuda-125_arch-x86_64.yaml b/conda/environments/runtime_cuda-125_arch-x86_64.yaml index 93399f21f9..304468cd2a 100644 --- a/conda/environments/runtime_cuda-125_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-125_arch-x86_64.yaml @@ -53,5 +53,4 @@ dependencies: - milvus==2.3.5 - pymilvus==2.3.6 - torch==2.4.0+cu124 -name: runtime_cuda-125_arch-x86_64 - +name: runtime_cuda-125_arch-x86_64 \ No newline at end of file From 3fdde2d403a0895ecf50ebb745bc866f8f06ff0d Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 12:37:11 -0400 Subject: [PATCH 12/17] add newlines back to env yamls --- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/dev_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/examples_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/model-utils_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/runtime_cuda-125_arch-x86_64.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 71bf78bde7..691d717f72 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -142,4 +142,4 @@ dependencies: - pytest-kafka==0.6.0 - sentence-transformers==2.7 - torch==2.4.0+cu124 -name: all_cuda-125_arch-x86_64 \ No newline at end of file +name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/dev_cuda-125_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml index 255881a81d..f27becb108 100644 --- a/conda/environments/dev_cuda-125_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml @@ -113,4 +113,4 @@ dependencies: - pymilvus==2.3.6 - pytest-kafka==0.6.0 - torch==2.4.0+cu124 -name: dev_cuda-125_arch-x86_64 \ No newline at end of file +name: dev_cuda-125_arch-x86_64 diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 6aa877726e..5e9b725d6f 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -79,4 +79,4 @@ dependencies: - pymilvus==2.3.6 - sentence-transformers==2.7 - torch==2.4.0+cu124 -name: examples_cuda-125_arch-x86_64 \ No newline at end of file +name: examples_cuda-125_arch-x86_64 diff --git a/conda/environments/model-utils_cuda-125_arch-x86_64.yaml b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml index ae605e0cf7..2957c36473 100644 --- a/conda/environments/model-utils_cuda-125_arch-x86_64.yaml +++ b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml @@ -20,4 +20,4 @@ dependencies: - seqeval=1.2.2 - transformers=4.36.2 - xgboost -name: model-utils_cuda-125_arch-x86_64 \ No newline at end of file +name: model-utils_cuda-125_arch-x86_64 diff --git a/conda/environments/runtime_cuda-125_arch-x86_64.yaml b/conda/environments/runtime_cuda-125_arch-x86_64.yaml index 304468cd2a..2551739061 100644 --- a/conda/environments/runtime_cuda-125_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-125_arch-x86_64.yaml @@ -53,4 +53,4 @@ dependencies: - milvus==2.3.5 - pymilvus==2.3.6 - torch==2.4.0+cu124 -name: runtime_cuda-125_arch-x86_64 \ No newline at end of file +name: runtime_cuda-125_arch-x86_64 From 7937f441238439cb3d494045a9472c4d00e78b50 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 14:56:36 -0400 Subject: [PATCH 13/17] use sentence-transformers[onnx-gpu] --- .../all_cuda-125_arch-x86_64.yaml | 2 +- .../examples_cuda-125_arch-x86_64.yaml | 75 ++----------------- dependencies.yaml | 2 +- 3 files changed, 10 insertions(+), 69 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 691d717f72..1546b443b3 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -140,6 +140,6 @@ dependencies: - nemollm==0.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 - - sentence-transformers==2.7 + - sentence-transformers[onnx-gpu]==2.7 - torch==2.4.0+cu124 name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 5e9b725d6f..2957c36473 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -9,74 +9,15 @@ channels: - nvidia/label/dev - pytorch dependencies: -- anyio>=3.7 -- appdirs -- arxiv=1.4 -- beautifulsoup4=4.12 -- boto3 -- click>=8 -- cudf=24.10 - cuml=24.10.* -- cupy -- datacompy=0.10 -- dill=0.3.7 -- docker-py=5.0 -- elasticsearch==8.9.0 -- feedparser=6.0 -- grpcio -- grpcio-status -- huggingface_hub=0.20.2 -- jsonpatch>=1.33 -- kfp -- libwebp=1.3.2 -- mlflow -- mrc=24.10 -- networkx=2.8.8 -- newspaper3k=0.2 -- nodejs=18.* -- numexpr -- numpydoc=1.5 -- onnx=1.15 -- openai=1.13 -- papermill=2.4.0 -- pip -- pluggy=1.3 -- pydantic -- pynvml=11.4 -- pypdf=3.17.4 -- pypdfium2=4.30 -- python-confluent-kafka>=1.9.2,<1.10.0a0 -- python-docx==1.1.0 -- python-graphviz +- jupyterlab +- matplotlib +- onnx +- pandas - python=3.10 -- rapids-dask-dependency=24.10 -- requests -- requests-cache=1.1 -- requests-toolbelt=1.0 -- s3fs - scikit-learn=1.3.2 -- sqlalchemy<2.0 -- tqdm=4 +- seaborn +- seqeval=1.2.2 - transformers=4.36.2 -- tritonclient=2.34 -- typing_utils=0.1 -- watchdog=3.0 -- websockets -- pip: - - --extra-index-url https://download.pytorch.org/whl/cu124 - - --find-links https://data.dgl.ai/wheels-test/repo.html - - --find-links https://data.dgl.ai/wheels/cu121/repo.html - - databricks-cli < 0.100 - - databricks-connect - - dgl==2.0.0 - - dglgo - - faiss-cpu - - google-search-results==2.4 - - langchain-nvidia-ai-endpoints==0.0.11 - - langchain==0.1.16 - - milvus==2.3.5 - - nemollm==0.3.5 - - pymilvus==2.3.6 - - sentence-transformers==2.7 - - torch==2.4.0+cu124 -name: examples_cuda-125_arch-x86_64 +- xgboost +name: model-utils_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 76518251c5..af1e3da8e8 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -474,7 +474,7 @@ dependencies: - faiss-cpu - google-search-results==2.4 - nemollm==0.3.5 - - sentence-transformers==2.7 + - sentence-transformers[onnx-gpu]==2.7 model-training-tuning: common: From 409741610323d7cc9b0bb93b8ea1c50802a4b366 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Sat, 19 Oct 2024 15:06:51 -0400 Subject: [PATCH 14/17] fix examples env yaml --- .../examples_cuda-125_arch-x86_64.yaml | 75 +++++++++++++++++-- 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 2957c36473..0c59ddf2b8 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -9,15 +9,74 @@ channels: - nvidia/label/dev - pytorch dependencies: +- anyio>=3.7 +- appdirs +- arxiv=1.4 +- beautifulsoup4=4.12 +- boto3 +- click>=8 +- cudf=24.10 - cuml=24.10.* -- jupyterlab -- matplotlib -- onnx -- pandas +- cupy +- datacompy=0.10 +- dill=0.3.7 +- docker-py=5.0 +- elasticsearch==8.9.0 +- feedparser=6.0 +- grpcio +- grpcio-status +- huggingface_hub=0.20.2 +- jsonpatch>=1.33 +- kfp +- libwebp=1.3.2 +- mlflow +- mrc=24.10 +- networkx=2.8.8 +- newspaper3k=0.2 +- nodejs=18.* +- numexpr +- numpydoc=1.5 +- onnx=1.15 +- openai=1.13 +- papermill=2.4.0 +- pip +- pluggy=1.3 +- pydantic +- pynvml=11.4 +- pypdf=3.17.4 +- pypdfium2=4.30 +- python-confluent-kafka>=1.9.2,<1.10.0a0 +- python-docx==1.1.0 +- python-graphviz - python=3.10 +- rapids-dask-dependency=24.10 +- requests +- requests-cache=1.1 +- requests-toolbelt=1.0 +- s3fs - scikit-learn=1.3.2 -- seaborn -- seqeval=1.2.2 +- sqlalchemy<2.0 +- tqdm=4 - transformers=4.36.2 -- xgboost -name: model-utils_cuda-125_arch-x86_64 +- tritonclient=2.34 +- typing_utils=0.1 +- watchdog=3.0 +- websockets +- pip: + - --extra-index-url https://download.pytorch.org/whl/cu124 + - --find-links https://data.dgl.ai/wheels-test/repo.html + - --find-links https://data.dgl.ai/wheels/cu121/repo.html + - databricks-cli < 0.100 + - databricks-connect + - dgl==2.0.0 + - dglgo + - faiss-cpu + - google-search-results==2.4 + - langchain-nvidia-ai-endpoints==0.0.11 + - langchain==0.1.16 + - milvus==2.3.5 + - nemollm==0.3.5 + - pymilvus==2.3.6 + - sentence-transformers[onnx-gpu]==2.7 + - torch==2.4.0+cu124 +name: examples_cuda-125_arch-x86_64 From 055fcc2db099064380f04ff0d18116be49dbefd4 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Mon, 21 Oct 2024 08:54:36 -0400 Subject: [PATCH 15/17] revert back to sentence-transformers conda package --- .../all_cuda-125_arch-x86_64.yaml | 2 +- .../examples_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 45 ++++++++++--------- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 1546b443b3..88a0893851 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -108,6 +108,7 @@ dependencies: - s3fs - scikit-build=0.17.6 - scikit-learn=1.3.2 +- sentence-transformers=2.7 - sphinx - sphinx_rtd_theme - sqlalchemy<2.0 @@ -140,6 +141,5 @@ dependencies: - nemollm==0.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 - - sentence-transformers[onnx-gpu]==2.7 - torch==2.4.0+cu124 name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 0c59ddf2b8..e387e2c9bf 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -55,6 +55,7 @@ dependencies: - requests-toolbelt=1.0 - s3fs - scikit-learn=1.3.2 +- sentence-transformers=2.7 - sqlalchemy<2.0 - tqdm=4 - transformers=4.36.2 @@ -77,6 +78,5 @@ dependencies: - milvus==2.3.5 - nemollm==0.3.5 - pymilvus==2.3.6 - - sentence-transformers[onnx-gpu]==2.7 - torch==2.4.0+cu124 name: examples_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index af1e3da8e8..4bbbd32d73 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -176,24 +176,27 @@ files: - checks - python - # pip dependencies that are used during the conda test stage for morpheus-llm output - morpheus_llm: - output: none - includes: - - test_morpheus_llm_pip - - test_morpheus_core_pip - - # pip dependencies that are used during the conda test stage for morpheus-core output + # morpheus-core pip dependencies are published as a morpheus package data file morpheus_core: - output: none + output: requirements + requirements_dir: python/morpheus/morpheus includes: - - test_morpheus_core_pip + - morpheus_core_pip - # pip dependencies that are used during the conda test stage for morpheus-dfp output + # morpheus-dfp pip dependencies are published as a morpheus_dfp package data file morpheus_dfp: - output: none + output: requirements + requirements_dir: python/morpheus_dfp/morpheus_dfp + includes: + - morpheus_core_pip + + # morpheus-llm pip dependencies are published as a morpheus_llm package data file + morpheus_llm: + output: requirements + requirements_dir: python/morpheus_llm/morpheus_llm includes: - - test_morpheus_core_pip + - morpheus_core_pip + - morpheus_llm_pip channels: - conda-forge @@ -403,24 +406,24 @@ dependencies: - pip: - pytest-kafka==0.6.0 - test_morpheus_core_pip: + morpheus_core_pip: common: - output_types: [requirements] packages: - --extra-index-url https://download.pytorch.org/whl/cu124 - torch==2.4.0+cu124 - test_morpheus_llm_pip: + morpheus_llm_pip: common: - output_types: [requirements] packages: - - milvus==2.3.5 # update to match pymilvus when available - - pymilvus==2.3.6 - - langchain==0.1.16 - - langchain-nvidia-ai-endpoints==0.0.11 - faiss-cpu - google-search-results==2.4 - - nemollm==0.3.5 + - langchain==0.1.16 + - langchain-nvidia-ai-endpoints==0.0.11 + - milvus==2.3.5 # update to match pymilvus when available + - pymilvus==2.3.6 + - nemollm==0.3.5 example-dfp-prod: common: @@ -467,6 +470,7 @@ dependencies: - *pypdfium2 - *python-docx - requests-toolbelt=1.0 # Transitive dep needed by nemollm, specified here to ensure we get a compatible version + - sentence-transformers=2.7 - pip - pip: - langchain==0.1.16 @@ -474,7 +478,6 @@ dependencies: - faiss-cpu - google-search-results==2.4 - nemollm==0.3.5 - - sentence-transformers[onnx-gpu]==2.7 model-training-tuning: common: From 4968c434ad8c9deb9ed820f94332feb07d364c3d Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Wed, 23 Oct 2024 09:53:01 -0400 Subject: [PATCH 16/17] revert environment setup instructions in readme --- tests/benchmarks/README.md | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 0deab37b43..6259c79e65 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -46,16 +46,37 @@ Once Triton server finishes starting up, it will display the status of all loade +--------------------+---------+--------+ ``` -### Build Morpheus Conda environment +### Set up Morpheus Dev Container -Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-a-conda-environment) to build your Morpheus Conda environment. +If you don't already have the Morpheus Dev container, run the following to build it: +```bash +./docker/build_container_dev.sh +``` -In step 3 of the instructions, choose the command to install all dependencies. +Now run the container: ```bash -conda env create --solver=libmamba -n morpheus --file conda/environments/all_cuda-125_arch-x86_64.yaml -conda activate morpheus +./docker/run_container_dev.sh ``` +Note that Morpheus containers are tagged by date. By default, `run_container_dev.sh` will try to use current date as tag. Therefore, if you are trying to run a container that was not built on the current date, you must set the `DOCKER_IMAGE_TAG` environment variable. For example, +```bash +DOCKER_IMAGE_TAG=dev-221003 ./docker/run_container_dev.sh +``` + +In the `/workspace` directory of the container, run the following to compile Morpheus: +```bash +./scripts/compile.sh +``` + +Now install Morpheus: +```bash +pip install -e /workspace +``` + +Fetch input data for benchmarks: +```bash +./scripts/fetch_data.py fetch validation +``` ### Run E2E Benchmarks From 326fd385442e64bfbfcd787eb84cfa1eceda06c3 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Thu, 31 Oct 2024 17:09:12 -0400 Subject: [PATCH 17/17] roll back dfp benchmarks updates --- .../morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py index 6376349bc7..051e3b7f25 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py @@ -40,7 +40,7 @@ from morpheus.stages.postprocess.serialize_stage import SerializeStage from morpheus.utils.column_info import DataFrameInputSchema from morpheus.utils.file_utils import date_extractor -from morpheus.utils.logger import set_log_level +from morpheus.utils.logger import configure_logging from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage @@ -105,7 +105,7 @@ def dfp_training_pipeline_stages(pipe_config: Config, filenames: typing.List[str], reuse_cache=False): - set_log_level(log_level=logger.level) + configure_logging(log_level=logger.level) pipeline = LinearPipeline(pipe_config) pipeline.set_source(MultiFileSource(pipe_config, filenames=filenames)) @@ -157,7 +157,7 @@ def dfp_inference_pipeline_stages(pipe_config: Config, output_filepath: str, reuse_cache=False): - set_log_level(log_level=logger.level) + configure_logging(log_level=logger.level) pipeline = LinearPipeline(pipe_config) pipeline.set_source(MultiFileSource(pipe_config, filenames=filenames))