From 0087ad678059fc132744135dee31f411f80bafd1 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Mon, 27 Nov 2023 11:48:17 -0700 Subject: [PATCH 1/9] Docs update to indicate use of conda-merge to generate install files instead of mamba update --- .../conda/environments/cuda11.8_examples.yml | 5 +++- docs/README.md | 6 +++- .../production/Dockerfile | 5 +++- .../production/morpheus/benchmarks/README.md | 24 ++++++++------- .../gnn_fraud_detection_pipeline/README.md | 6 +++- examples/llm/agents/README.md | 7 ++++- examples/llm/completion/README.md | 28 ++++++++++++----- .../fraud-detection-models/README.md | 10 +++++-- morpheus/llm/services/nemo_llm_service.py | 2 +- morpheus/llm/services/openai_chat_service.py | 5 +++- morpheus/stages/input/arxiv_source.py | 10 +++++-- tests/benchmarks/README.md | 30 +++++++++++-------- tests/examples/llm/common/conftest.py | 7 +++-- tests/llm/conftest.py | 10 +++++-- tests/stages/arxiv/conftest.py | 9 +++--- 15 files changed, 114 insertions(+), 50 deletions(-) diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index 2e6107e48a..48d3897e50 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -18,7 +18,10 @@ # env with this file. ex: # mamba env create -n morpheus --file docker/conda/environments/cuda11.8_dev.yml # conda activate morpheus -# mamba env update -n morpheus --file docker/conda/environments/cuda11.8_examples.yml +# mamba install -n base -c conda-forge conda-merge +# conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ +# docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ +# && mamba env update -n morpheus --file ./merged.yml channels: - rapidsai - nvidia diff --git a/docs/README.md b/docs/README.md index 03235e8c37..77fd468bee 100644 --- a/docs/README.md +++ b/docs/README.md @@ -22,7 +22,11 @@ Additional packages required for building the documentation are defined in `./co ## Install Additional Dependencies From the root of the Morpheus repo: ```bash -mamba env update -f docs/conda_docs.yml +export CUDA_VER=11.8 +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + docs/conda_docs.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` ## Build Morpheus and Documentation diff --git a/examples/digital_fingerprinting/production/Dockerfile b/examples/digital_fingerprinting/production/Dockerfile index 5c65e6e752..61c4dfffeb 100644 --- a/examples/digital_fingerprinting/production/Dockerfile +++ b/examples/digital_fingerprinting/production/Dockerfile @@ -31,7 +31,10 @@ COPY ./conda_env.yml ./ # Install DFP dependencies RUN source activate morpheus \ - && mamba env update -n morpheus -f ./conda_env.yml + && mamba install -n base -c conda-forge conda-merge \ + && conda run -n base --live-stream conda-merge /workspace/docker/conda/environments/cuda11.8_dev.yml \ + ./conda_env.yml > ./merged.yml \ + && mamba env update -n morpheus --file ./merged.yml # Set the tracking URI for mlflow ENV MLFLOW_TRACKING_URI="http://mlflow:5000" diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md index 39ad984193..cad1fe96d6 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md @@ -19,38 +19,42 @@ ### Set up Morpheus Dev Container If you don't already have the Morpheus Dev container, run the following to build it: -``` +```bash ./docker/build_container_dev.sh ``` Now run the container: -``` +```bash ./docker/run_container_dev.sh ``` Note that Morpheus containers are tagged by date. By default, `run_container_dev.sh` will try to use current date as tag. Therefore, if you are trying to run a container that was not built on the current date, you must set the `DOCKER_IMAGE_TAG` environment variable. For example, -``` +```bash DOCKER_IMAGE_TAG=dev-221003 ./docker/run_container_dev.sh ``` In the `/workspace` directory of the container, run the following to compile Morpheus: -``` +```bash ./scripts/compile.sh ``` Now install Morpheus: -``` +```bash pip install -e /workspace ``` Install additonal required dependencies: -``` +```bash export CUDA_VER=11.8 -mamba env update -n morpheus --file docker/conda/environments/cuda${CUDA_VER}_examples.yml +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` + Fetch input data for benchmarks: -``` +```bash ./examples/digital_fingerprinting/fetch_example_data.py all ``` @@ -58,12 +62,12 @@ Fetch input data for benchmarks: MLflow is used as the model repository where the trained DFP models will be published and used for inference by the pipelines. Run the following to start MLflow in a host terminal window (not container): -``` +```bash # from root of Morpheus repo cd examples/digital_fingerprinting/production ``` -``` +```bash docker compose up mlflow ``` diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index f7d2d005dc..291fb75037 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -21,7 +21,11 @@ limitations under the License. Prior to running the GNN fraud detection pipeline, additional requirements must be installed in to your Conda environment. A supplemental requirements file has been provided in this example directory. ```bash -mamba env update -n ${CONDA_DEFAULT_ENV} -f examples/gnn_fraud_detection_pipeline/requirements.yml +export CUDA_VER=11.8 +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` ## Running diff --git a/examples/llm/agents/README.md b/examples/llm/agents/README.md index 08bde8445f..8728091be6 100644 --- a/examples/llm/agents/README.md +++ b/examples/llm/agents/README.md @@ -95,9 +95,14 @@ export SERPAPI_API_KEY="" Install the required dependencies. ```bash -mamba env update -n morpheus --file ${MORPHEUS_ROOT}/docker/conda/environments/cuda11.8_examples.yml +export CUDA_VER=11.8 +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` + ### Running the Morpheus Pipeline The top level entrypoint to each of the LLM example pipelines is `examples/llm/main.py`. This script accepts a set diff --git a/examples/llm/completion/README.md b/examples/llm/completion/README.md index 31920cdda8..c3d86fffd2 100644 --- a/examples/llm/completion/README.md +++ b/examples/llm/completion/README.md @@ -32,13 +32,24 @@ limitations under the License. ### Purpose -The primary goal of this example is to showcase the creation of a pipeline that integrates an LLM service with Morpheus. Although this example features a single implementation, the pipeline and its components are versatile and can be adapted to various scenarios with unique requirements. The following highlights different customization points within the pipeline and the specific choices made for this example: +The primary goal of this example is to showcase the creation of a pipeline that integrates an LLM service with Morpheus. +Although this example features a single implementation, the pipeline and its components are versatile and can be adapted +to various scenarios with unique requirements. The following highlights different customization points within the +pipeline and the specific choices made for this example: #### LLM Service -- The pipeline is designed to support any LLM service that adheres to our LLMService interface. Compatible services include OpenAI, NeMo, or even local execution using llama-cpp-python. In this demonstration, we focus on utilizing NeMo as the LLM service, highlighting the advantages it offers over other LLM services and the seamless integration with the NeMo ecosystem. Furthermore, the pipeline can accommodate more complex configurations using NeMo + Inform without necessitating changes to the core pipeline. + +- The pipeline is designed to support any LLM service that adheres to our LLMService interface. Compatible services + include OpenAI, NeMo, or even local execution using llama-cpp-python. In this demonstration, we focus on utilizing + NeMo as the LLM service, highlighting the advantages it offers over other LLM services and the seamless integration + with the NeMo ecosystem. Furthermore, the pipeline can accommodate more complex configurations using NeMo + Inform + without necessitating changes to the core pipeline. #### Downstream Tasks -- Post LLM execution, the model's output can be leveraged for various tasks, including model training, analysis, or simulating an attack. In this particular example, we have simplified the implementation and focused solely on the LLMEngine. + +- Post LLM execution, the model's output can be leveraged for various tasks, including model training, analysis, or + simulating an attack. In this particular example, we have simplified the implementation and focused solely on the + LLMEngine. ### Pipeline Implementation @@ -64,9 +75,14 @@ Before running the pipeline, ensure that the `NGC_API_KEY` environment variable Install the required dependencies. ```bash -mamba env update -n morpheus --file ${MORPHEUS_ROOT}/docker/conda/environments/cuda11.8_examples.yml +export CUDA_VER=11.8 +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` + #### Setting up NGC API Key For this example, we utilize the NeMo Service within NGC. To gain access, an NGC API key is required. Follow the @@ -75,7 +91,6 @@ generate your NGC API key. Configure the following environment variables, with NGC_ORG_ID being optional: - ```bash export NGC_API_KEY= export NGC_ORG_ID= @@ -105,7 +120,7 @@ python examples/llm/main.py completion [OPTIONS] COMMAND [ARGS]... - `--pipeline_batch_size INTEGER RANGE` - **Description**: Internal batch size for the pipeline. Can be much larger than the model batch size. - Also used for Kafka consumers. + Also used for Kafka consumers. - **Default**: `1024` - `--model_max_batch_size INTEGER RANGE` @@ -123,7 +138,6 @@ python examples/llm/main.py completion [OPTIONS] COMMAND [ARGS]... - `--help` - **Description**: Show the help message with options and commands details. - ### Running Morpheus Pipeline with OpenAI LLM service ```bash diff --git a/models/training-tuning-scripts/fraud-detection-models/README.md b/models/training-tuning-scripts/fraud-detection-models/README.md index 824d473bcf..04f6f8fca1 100644 --- a/models/training-tuning-scripts/fraud-detection-models/README.md +++ b/models/training-tuning-scripts/fraud-detection-models/README.md @@ -23,13 +23,17 @@ limitations under the License. Install packages for training GNN model. -``` -mamba env update -n ${CONDA_DEFAULT_ENV} -f requirements.yml +```bash +export CUDA_VER=11.8 +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + requirements.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` ### Options for training and tuning models. -``` +```bash python training.py --help Usage: training.py [OPTIONS] diff --git a/morpheus/llm/services/nemo_llm_service.py b/morpheus/llm/services/nemo_llm_service.py index c08a41717d..4d901e347b 100644 --- a/morpheus/llm/services/nemo_llm_service.py +++ b/morpheus/llm/services/nemo_llm_service.py @@ -24,7 +24,7 @@ IMPORT_ERROR_MESSAGE = ( "NemoLLM not found. Install it and other additional dependencies by running the following command:\n" - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") + nvcr.io/nvidia/morpheus/morpheus:v23.11.00a-runtime try: import nemollm diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py index dda00d032d..b3924d8bbb 100644 --- a/morpheus/llm/services/openai_chat_service.py +++ b/morpheus/llm/services/openai_chat_service.py @@ -25,7 +25,10 @@ IMPORT_ERROR_MESSAGE = ( "OpenAIChatService & OpenAIChatClient require the openai package to be installed. " "Install it by running the following command:\n" - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") try: import openai diff --git a/morpheus/stages/input/arxiv_source.py b/morpheus/stages/input/arxiv_source.py index 78db6d5ba7..7762b373f9 100644 --- a/morpheus/stages/input/arxiv_source.py +++ b/morpheus/stages/input/arxiv_source.py @@ -36,7 +36,10 @@ IMPORT_ERROR_MESSAGE = ( "ArxivSource requires additional dependencies to be installed. Install them by running the following command: " - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") @register_stage("from-arxiv") @@ -45,7 +48,10 @@ class ArxivSource(PreallocatorMixin, SingleOutputSource): Source stage that downloads PDFs from arxiv and converts them to dataframes. This stage requires several additional dependencies to be installed. Install them by running the following command: - `mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml` + mamba install -n base -c conda-forge conda-merge + conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml + docker/conda/environments/cuda${CUDA_VER}_examples.yml + > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml Parameters ---------- diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 25cf9f2a19..da0553c287 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -23,12 +23,12 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri Example: -``` +```bash docker pull nvcr.io/nvidia/tritonserver:23.06-py3 ``` ##### Start Triton Inference Server container -``` +```bash cd ${MORPHEUS_ROOT}/models docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx @@ -50,32 +50,32 @@ Once Triton server finishes starting up, it will display the status of all loade ### Set up Morpheus Dev Container If you don't already have the Morpheus Dev container, run the following to build it: -``` +```bash ./docker/build_container_dev.sh ``` Now run the container: -``` +```bash ./docker/run_container_dev.sh ``` Note that Morpheus containers are tagged by date. By default, `run_container_dev.sh` will try to use current date as tag. Therefore, if you are trying to run a container that was not built on the current date, you must set the `DOCKER_IMAGE_TAG` environment variable. For example, -``` +```bash DOCKER_IMAGE_TAG=dev-221003 ./docker/run_container_dev.sh ``` In the `/workspace` directory of the container, run the following to compile Morpheus: -``` +```bash ./scripts/compile.sh ``` Now install Morpheus: -``` +```bash pip install -e /workspace ``` Fetch input data for benchmarks: -``` +```bash ./scripts/fetch_data.py fetch validation ``` @@ -102,7 +102,7 @@ Morpheus configurations for each workflow are managed using `e2e_test_configs.js Benchmarks for an individual workflow can be run using the following: -``` +```bash cd tests/benchmarks pytest -s --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave test_bench_e2e_pipelines.py:: @@ -118,12 +118,12 @@ The `--benchmark-warmup` and `--benchmark-warmup-iterations` options are used to - `test_cloudtrail_ae_e2e` For example, to run E2E benchmarks on the SID NLP workflow: -``` +```bash pytest -s --run_benchmark --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave test_bench_e2e_pipelines.py::test_sid_nlp_e2e ``` To run E2E benchmarks on all workflows: -``` +```bash pytest -s --run_benchmark --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave test_bench_e2e_pipelines.py ``` @@ -188,7 +188,11 @@ Note that the `test_cloudtrail_ae_e2e` benchmarks measure performance of a pipel You can use the same Dev container created here to run the Production DFP benchmarks. You would just need to install additional dependencies as follows: -``` +```bash export CUDA_VER=11.8 -mamba env update -n morpheus --file docker/conda/environments/cuda${CUDA_VER}_examples.yml +mamba install -n base -c conda-forge conda-merge +conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ + docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ + && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml + ``` \ No newline at end of file diff --git a/tests/examples/llm/common/conftest.py b/tests/examples/llm/common/conftest.py index be07e47af6..11ef4bad0c 100644 --- a/tests/examples/llm/common/conftest.py +++ b/tests/examples/llm/common/conftest.py @@ -21,8 +21,11 @@ @pytest.fixture(name="nemollm", autouse=True, scope='session') def nemollm_fixture(fail_missing: bool): """ - All of the tests in this subdir require nemollm + All the tests in this subdir require nemollm """ skip_reason = ("Tests for the WebScraperStage require the langchain package to be installed, to install this run:\n" - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") yield import_or_skip("langchain", reason=skip_reason, fail_missing=fail_missing) diff --git a/tests/llm/conftest.py b/tests/llm/conftest.py index 060032a886..105fa9162c 100644 --- a/tests/llm/conftest.py +++ b/tests/llm/conftest.py @@ -27,7 +27,10 @@ def nemollm_fixture(fail_missing: bool): Fixture to ensure nemollm is installed """ skip_reason = ("Tests for the NeMoLLMService require the nemollm package to be installed, to install this run:\n" - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") yield import_or_skip("nemollm", reason=skip_reason, fail_missing=fail_missing) @@ -37,7 +40,10 @@ def openai_fixture(fail_missing: bool): Fixture to ensure openai is installed """ skip_reason = ("Tests for the OpenAIChatService require the openai package to be installed, to install this run:\n" - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") yield import_or_skip("openai", reason=skip_reason, fail_missing=fail_missing) diff --git a/tests/stages/arxiv/conftest.py b/tests/stages/arxiv/conftest.py index b304e64159..0c3e82a7f1 100644 --- a/tests/stages/arxiv/conftest.py +++ b/tests/stages/arxiv/conftest.py @@ -22,13 +22,14 @@ SKIP_REASON = ("Tests for the arxiv_source require a number of packages not installed in the Morpheus development " "environment. To install these run:\n" - "`mamba env update -n ${CONDA_DEFAULT_ENV} --file docker/conda/environments/cuda11.8_examples.yml`") - - + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") @pytest.fixture(name="arxiv", autouse=True, scope='session') def arxiv_fixture(fail_missing: bool): """ - All of the tests in this subdir require arxiv + All the tests in this subdir require arxiv """ yield import_or_skip("arxiv", reason=SKIP_REASON, fail_missing=fail_missing) From f187534d644d2764f55f81db4a1a5f548207196e Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Mon, 27 Nov 2023 11:55:25 -0700 Subject: [PATCH 2/9] Typo fixes --- examples/gnn_fraud_detection_pipeline/README.md | 2 +- examples/llm/completion/README.md | 15 +++------------ morpheus/llm/services/nemo_llm_service.py | 5 ++++- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index 291fb75037..8d7aae086a 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -24,7 +24,7 @@ Prior to running the GNN fraud detection pipeline, additional requirements must export CUDA_VER=11.8 mamba install -n base -c conda-forge conda-merge conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ - docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ + examples/gnn_fraud_detection_pipeline/requirements.yml > .tmp/merged.yml \ && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` diff --git a/examples/llm/completion/README.md b/examples/llm/completion/README.md index c3d86fffd2..50678b25c4 100644 --- a/examples/llm/completion/README.md +++ b/examples/llm/completion/README.md @@ -32,24 +32,15 @@ limitations under the License. ### Purpose -The primary goal of this example is to showcase the creation of a pipeline that integrates an LLM service with Morpheus. -Although this example features a single implementation, the pipeline and its components are versatile and can be adapted -to various scenarios with unique requirements. The following highlights different customization points within the -pipeline and the specific choices made for this example: +The primary goal of this example is to showcase the creation of a pipeline that integrates an LLM service with Morpheus. Although this example features a single implementation, the pipeline and its components are versatile and can be adapted to various scenarios with unique requirements. The following highlights different customization points within the pipeline and the specific choices made for this example: #### LLM Service -- The pipeline is designed to support any LLM service that adheres to our LLMService interface. Compatible services - include OpenAI, NeMo, or even local execution using llama-cpp-python. In this demonstration, we focus on utilizing - NeMo as the LLM service, highlighting the advantages it offers over other LLM services and the seamless integration - with the NeMo ecosystem. Furthermore, the pipeline can accommodate more complex configurations using NeMo + Inform - without necessitating changes to the core pipeline. +- The pipeline is designed to support any LLM service that adheres to our LLMService interface. Compatible services include OpenAI, NeMo, or even local execution using llama-cpp-python. In this demonstration, we focus on utilizing NeMo as the LLM service, highlighting the advantages it offers over other LLM services and the seamless integration with the NeMo ecosystem. Furthermore, the pipeline can accommodate more complex configurations using NeMo + Inform without necessitating changes to the core pipeline. #### Downstream Tasks -- Post LLM execution, the model's output can be leveraged for various tasks, including model training, analysis, or - simulating an attack. In this particular example, we have simplified the implementation and focused solely on the - LLMEngine. +- Post LLM execution, the model's output can be leveraged for various tasks, including model training, analysis, or simulating an attack. In this particular example, we have simplified the implementation and focused solely on the LLMEngine. ### Pipeline Implementation diff --git a/morpheus/llm/services/nemo_llm_service.py b/morpheus/llm/services/nemo_llm_service.py index 4d901e347b..17deace5f6 100644 --- a/morpheus/llm/services/nemo_llm_service.py +++ b/morpheus/llm/services/nemo_llm_service.py @@ -24,7 +24,10 @@ IMPORT_ERROR_MESSAGE = ( "NemoLLM not found. Install it and other additional dependencies by running the following command:\n" - nvcr.io/nvidia/morpheus/morpheus:v23.11.00a-runtime + "`mamba install -n base -c conda-forge conda-merge`\n" + "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " + " docker/conda/environments/cuda${CUDA_VER}_examples.yml" + " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") try: import nemollm From 22987d071543488aa2fe66a74fa3c7038989502a Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Mon, 27 Nov 2023 12:28:20 -0700 Subject: [PATCH 3/9] Formatting fix --- tests/stages/arxiv/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/stages/arxiv/conftest.py b/tests/stages/arxiv/conftest.py index 0c3e82a7f1..52ac0747e6 100644 --- a/tests/stages/arxiv/conftest.py +++ b/tests/stages/arxiv/conftest.py @@ -26,6 +26,8 @@ "`conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml " " docker/conda/environments/cuda${CUDA_VER}_examples.yml" " > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml`") + + @pytest.fixture(name="arxiv", autouse=True, scope='session') def arxiv_fixture(fail_missing: bool): """ From e2c693c2cb39a791235e8014d22643ce7194918b Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Mon, 27 Nov 2023 15:24:29 -0700 Subject: [PATCH 4/9] Add default channel to environment ymls so they work with condarc specifications in dev containers --- docker/conda/environments/cuda11.8_dev.yml | 1 + docker/conda/environments/cuda11.8_examples.yml | 1 + docker/conda/environments/cuda11.8_runtime.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 8235cdc6c1..29e06bf161 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -21,6 +21,7 @@ channels: - nvidia/label/dev # For pre-releases of MRC. Should still default to full releases if available - pytorch - conda-forge + - default dependencies: ####### Morpheus Dependencies (keep sorted!) ####### - automake=1.16.5 diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index 48d3897e50..53b6cdc4db 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -28,6 +28,7 @@ channels: - huggingface - conda-forge - dglteam/label/cu118 + - default dependencies: - arxiv=1.4 - boto3 diff --git a/docker/conda/environments/cuda11.8_runtime.yml b/docker/conda/environments/cuda11.8_runtime.yml index 4ff10a6837..60bb2ab3b1 100644 --- a/docker/conda/environments/cuda11.8_runtime.yml +++ b/docker/conda/environments/cuda11.8_runtime.yml @@ -19,6 +19,7 @@ channels: - nvidia - rapidsai-nightly - conda-forge + - default dependencies: - nb_conda_kernels - pip From 9f9ede11f73198da8b7559747f1924abce0a5f30 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Mon, 27 Nov 2023 23:29:44 -0700 Subject: [PATCH 5/9] PR feedback updates --- ci/conda/recipes/morpheus/meta.yaml | 2 +- docker/conda/environments/cuda11.8_examples.yml | 2 -- morpheus/llm/nodes/extracter_node.py | 2 +- morpheus/stages/input/arxiv_source.py | 4 ++-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml index 5bd1da8b98..c1644da3b2 100644 --- a/ci/conda/recipes/morpheus/meta.yaml +++ b/ci/conda/recipes/morpheus/meta.yaml @@ -82,7 +82,7 @@ outputs: - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - mlflow>=2.2.1,<3 - mrc - - networkx 3.1.* + - networkx>=2.8 - numpydoc 1.4.* - nvtabular {{ rapids_version }}.* - pandas 1.3.* diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index 53b6cdc4db..822a0d6813 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -16,8 +16,6 @@ # Additional dependencies needed by a some of the Morpheus examples. # The intended usage is to first create the conda environment from the `cuda11.8_dev.yml` file, and then update the # env with this file. ex: -# mamba env create -n morpheus --file docker/conda/environments/cuda11.8_dev.yml -# conda activate morpheus # mamba install -n base -c conda-forge conda-merge # conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ # docker/conda/environments/cuda${CUDA_VER}_examples.yml > .tmp/merged.yml \ diff --git a/morpheus/llm/nodes/extracter_node.py b/morpheus/llm/nodes/extracter_node.py index cb5baf6179..374753bcc4 100644 --- a/morpheus/llm/nodes/extracter_node.py +++ b/morpheus/llm/nodes/extracter_node.py @@ -30,7 +30,7 @@ class ExtracterNode(LLMNodeBase): """ def get_input_names(self) -> list[str]: - # This node does not receive it's inputs from upstream nodes, but rather from the task itself + # This node does not receive its inputs from upstream nodes, but rather from the task itself return [] async def execute(self, context: LLMContext) -> LLMContext: diff --git a/morpheus/stages/input/arxiv_source.py b/morpheus/stages/input/arxiv_source.py index 7762b373f9..c85720a706 100644 --- a/morpheus/stages/input/arxiv_source.py +++ b/morpheus/stages/input/arxiv_source.py @@ -50,8 +50,8 @@ class ArxivSource(PreallocatorMixin, SingleOutputSource): This stage requires several additional dependencies to be installed. Install them by running the following command: mamba install -n base -c conda-forge conda-merge conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml - docker/conda/environments/cuda${CUDA_VER}_examples.yml - > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml + docker/conda/environments/cuda${CUDA_VER}_examples.yml + > .tmp/merged.yml && mamba env update -n morpheus --file .tmp/merged.yml Parameters ---------- From a9e5b45e5e930b82f5c238e85c1bbfd3d7d53fa2 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Tue, 28 Nov 2023 11:23:00 -0700 Subject: [PATCH 6/9] Fix default -> defaults problem --- docker/conda/environments/cuda11.8_dev.yml | 2 +- docker/conda/environments/cuda11.8_examples.yml | 2 +- docker/conda/environments/cuda11.8_runtime.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 29e06bf161..7c1c45f3e1 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -21,7 +21,7 @@ channels: - nvidia/label/dev # For pre-releases of MRC. Should still default to full releases if available - pytorch - conda-forge - - default + - defaults dependencies: ####### Morpheus Dependencies (keep sorted!) ####### - automake=1.16.5 diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index 822a0d6813..1d49130bcc 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -26,7 +26,7 @@ channels: - huggingface - conda-forge - dglteam/label/cu118 - - default + - defaults dependencies: - arxiv=1.4 - boto3 diff --git a/docker/conda/environments/cuda11.8_runtime.yml b/docker/conda/environments/cuda11.8_runtime.yml index 60bb2ab3b1..b8cbfc29ef 100644 --- a/docker/conda/environments/cuda11.8_runtime.yml +++ b/docker/conda/environments/cuda11.8_runtime.yml @@ -19,7 +19,7 @@ channels: - nvidia - rapidsai-nightly - conda-forge - - default + - defaults dependencies: - nb_conda_kernels - pip From a3570d053da5490fb8706e928474b18a5c691910 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Tue, 28 Nov 2023 15:18:32 -0700 Subject: [PATCH 7/9] Update all requirements scripts to include defaults -- avoids container install issues --- examples/gnn_fraud_detection_pipeline/requirements.yml | 1 + examples/llm/agents/requirements.yaml | 1 + examples/llm/completion/requirements.yaml | 1 + examples/llm/rag/requirements.yaml | 1 + examples/llm/vdb_upload/requirements.yaml | 1 + models/training-tuning-scripts/abp-models/requirements.yml | 1 + models/training-tuning-scripts/dfp-models/requirements.yml | 1 + .../fraud-detection-models/requirements.yml | 1 + .../training-tuning-scripts/log-parsing-models/requirements.yml | 1 + models/training-tuning-scripts/phishing-models/requirements.yml | 1 + .../training-tuning-scripts/ransomware-models/requirements.yml | 1 + .../training-tuning-scripts/root-cause-models/requirements.yml | 1 + models/training-tuning-scripts/sid-models/requirements.yml | 1 + .../fraud-detection-models/requirements.yml | 1 + 14 files changed, 14 insertions(+) diff --git a/examples/gnn_fraud_detection_pipeline/requirements.yml b/examples/gnn_fraud_detection_pipeline/requirements.yml index 01f641c047..e0f37be2cd 100644 --- a/examples/gnn_fraud_detection_pipeline/requirements.yml +++ b/examples/gnn_fraud_detection_pipeline/requirements.yml @@ -18,6 +18,7 @@ channels: - nvidia - conda-forge - dglteam/label/cu118 + - defaults dependencies: - cuml=23.06 - dgl=1.0.2 diff --git a/examples/llm/agents/requirements.yaml b/examples/llm/agents/requirements.yaml index 5be71458cb..fe18ac788e 100644 --- a/examples/llm/agents/requirements.yaml +++ b/examples/llm/agents/requirements.yaml @@ -16,6 +16,7 @@ channels: - huggingface - conda-forge + - defaults dependencies: - langchain=0.0.190 - pip diff --git a/examples/llm/completion/requirements.yaml b/examples/llm/completion/requirements.yaml index 2929c9967b..797989feda 100644 --- a/examples/llm/completion/requirements.yaml +++ b/examples/llm/completion/requirements.yaml @@ -15,6 +15,7 @@ channels: - conda-forge + - defaults dependencies: - arxiv=1.4 - langchain=0.0.190 diff --git a/examples/llm/rag/requirements.yaml b/examples/llm/rag/requirements.yaml index d8579b8065..c43b5ebd84 100644 --- a/examples/llm/rag/requirements.yaml +++ b/examples/llm/rag/requirements.yaml @@ -16,6 +16,7 @@ channels: - huggingface - conda-forge + - defaults dependencies: - pip - openai=0.28 diff --git a/examples/llm/vdb_upload/requirements.yaml b/examples/llm/vdb_upload/requirements.yaml index e9786a69ce..68c4335c7f 100644 --- a/examples/llm/vdb_upload/requirements.yaml +++ b/examples/llm/vdb_upload/requirements.yaml @@ -15,6 +15,7 @@ channels: - conda-forge + - defaults dependencies: - arxiv=1.4 - onnx # required for triton model export diff --git a/models/training-tuning-scripts/abp-models/requirements.yml b/models/training-tuning-scripts/abp-models/requirements.yml index 93b5b4c00d..8bc65d6f54 100644 --- a/models/training-tuning-scripts/abp-models/requirements.yml +++ b/models/training-tuning-scripts/abp-models/requirements.yml @@ -17,6 +17,7 @@ channels: - rapidsai - nvidia - conda-forge + - defaults dependencies: - cuml=23.06 - jupyterlab diff --git a/models/training-tuning-scripts/dfp-models/requirements.yml b/models/training-tuning-scripts/dfp-models/requirements.yml index 902499da60..c7f5423ae8 100644 --- a/models/training-tuning-scripts/dfp-models/requirements.yml +++ b/models/training-tuning-scripts/dfp-models/requirements.yml @@ -17,6 +17,7 @@ channels: - nvidia - pytorch - conda-forge + - defaults dependencies: - dill - jupyterlab diff --git a/models/training-tuning-scripts/fraud-detection-models/requirements.yml b/models/training-tuning-scripts/fraud-detection-models/requirements.yml index 11df049834..31cce4e464 100644 --- a/models/training-tuning-scripts/fraud-detection-models/requirements.yml +++ b/models/training-tuning-scripts/fraud-detection-models/requirements.yml @@ -19,6 +19,7 @@ channels: - dglteam/label/cu118 - pytorch - conda-forge + - defaults dependencies: - click>=8 - cuml=23.06 diff --git a/models/training-tuning-scripts/log-parsing-models/requirements.yml b/models/training-tuning-scripts/log-parsing-models/requirements.yml index 1132ddb083..761443a742 100644 --- a/models/training-tuning-scripts/log-parsing-models/requirements.yml +++ b/models/training-tuning-scripts/log-parsing-models/requirements.yml @@ -18,6 +18,7 @@ channels: - nvidia - pytorch - conda-forge + - defaults dependencies: - cudf=23.06 - jupyterlab diff --git a/models/training-tuning-scripts/phishing-models/requirements.yml b/models/training-tuning-scripts/phishing-models/requirements.yml index d87c9ccc88..8d5321283a 100644 --- a/models/training-tuning-scripts/phishing-models/requirements.yml +++ b/models/training-tuning-scripts/phishing-models/requirements.yml @@ -18,6 +18,7 @@ channels: - nvidia - pytorch - conda-forge + - defaults dependencies: - cudf=23.06 - jupyterlab diff --git a/models/training-tuning-scripts/ransomware-models/requirements.yml b/models/training-tuning-scripts/ransomware-models/requirements.yml index f6495ecf08..5524db13e7 100644 --- a/models/training-tuning-scripts/ransomware-models/requirements.yml +++ b/models/training-tuning-scripts/ransomware-models/requirements.yml @@ -15,6 +15,7 @@ channels: - conda-forge + - defaults dependencies: - jupyterlab - matplotlib diff --git a/models/training-tuning-scripts/root-cause-models/requirements.yml b/models/training-tuning-scripts/root-cause-models/requirements.yml index d87c9ccc88..8d5321283a 100644 --- a/models/training-tuning-scripts/root-cause-models/requirements.yml +++ b/models/training-tuning-scripts/root-cause-models/requirements.yml @@ -18,6 +18,7 @@ channels: - nvidia - pytorch - conda-forge + - defaults dependencies: - cudf=23.06 - jupyterlab diff --git a/models/training-tuning-scripts/sid-models/requirements.yml b/models/training-tuning-scripts/sid-models/requirements.yml index d87c9ccc88..8d5321283a 100644 --- a/models/training-tuning-scripts/sid-models/requirements.yml +++ b/models/training-tuning-scripts/sid-models/requirements.yml @@ -18,6 +18,7 @@ channels: - nvidia - pytorch - conda-forge + - defaults dependencies: - cudf=23.06 - jupyterlab diff --git a/models/validation-inference-scripts/fraud-detection-models/requirements.yml b/models/validation-inference-scripts/fraud-detection-models/requirements.yml index 7fe973ff1d..c002d618a5 100644 --- a/models/validation-inference-scripts/fraud-detection-models/requirements.yml +++ b/models/validation-inference-scripts/fraud-detection-models/requirements.yml @@ -17,6 +17,7 @@ channels: - rapidsai - nvidia - conda-forge + - defaults dependencies: - click==8.1.3 - cuml=23.06 From 9bd64c929204da855a1ec3941e05de817f1013a4 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Tue, 28 Nov 2023 15:21:18 -0700 Subject: [PATCH 8/9] Fix fraud detection merge path --- models/training-tuning-scripts/fraud-detection-models/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/training-tuning-scripts/fraud-detection-models/README.md b/models/training-tuning-scripts/fraud-detection-models/README.md index 04f6f8fca1..8039c07f0c 100644 --- a/models/training-tuning-scripts/fraud-detection-models/README.md +++ b/models/training-tuning-scripts/fraud-detection-models/README.md @@ -27,7 +27,7 @@ Install packages for training GNN model. export CUDA_VER=11.8 mamba install -n base -c conda-forge conda-merge conda run -n base --live-stream conda-merge docker/conda/environments/cuda${CUDA_VER}_dev.yml \ - requirements.yml > .tmp/merged.yml \ + models/training-tuning-scripts/fraud-detection-models/requirements.yml > .tmp/merged.yml \ && mamba env update -n ${CONDA_DEFAULT_ENV} --file .tmp/merged.yml ``` From 45cecb4749a8dcb95591f50b95965d56a5606886 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Wed, 29 Nov 2023 11:09:27 -0700 Subject: [PATCH 9/9] Remove matplotlib dependency for fraud-detection-models, it appears incompatible with libwebp 1.3.2, and was not being used --- .../fraud-detection-models/gnn-fraud-detection-training.ipynb | 3 +-- .../fraud-detection-models/requirements.yml | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb index d12f4c6bd3..d66234974d 100644 --- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb +++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb @@ -52,7 +52,6 @@ "%autoreload 2\n", "import pandas as pd\n", "import numpy as np\n", - "import matplotlib.pylab as plt\n", "import os\n", "import dgl\n", "import numpy as np\n", @@ -1011,7 +1010,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/models/training-tuning-scripts/fraud-detection-models/requirements.yml b/models/training-tuning-scripts/fraud-detection-models/requirements.yml index 31cce4e464..5714a666e7 100644 --- a/models/training-tuning-scripts/fraud-detection-models/requirements.yml +++ b/models/training-tuning-scripts/fraud-detection-models/requirements.yml @@ -25,7 +25,6 @@ dependencies: - cuml=23.06 - dgl - jupyterlab - - matplotlib - pytorch-cuda=11.8 - pytorch=2.0.1 - scikit-learn=1.2.2