From b00e953c1306a101b763e10141facf6059b2436e Mon Sep 17 00:00:00 2001 From: Scott Votaw Date: Wed, 30 Nov 2022 10:55:10 -0800 Subject: [PATCH 1/2] Add docs for passThroughArgs --- .../ml/lightgbm/params/LightGBMParams.scala | 3 +- website/docs/features/lightgbm/about.md | 42 +++++++++++++++++++ .../version-0.10.0/features/lightgbm/about.md | 42 +++++++++++++++++++ .../version-0.10.1/features/lightgbm/about.md | 42 +++++++++++++++++++ .../version-0.10.2/features/lightgbm/about.md | 42 +++++++++++++++++++ 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala index 287cd62247..efa3139b18 100644 --- a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala @@ -15,7 +15,8 @@ import org.apache.spark.ml.util.DefaultParamsWritable trait LightGBMExecutionParams extends Wrappable { val passThroughArgs = new Param[String](this, "passThroughArgs", "Direct string to pass through to LightGBM library (appended with other explicitly set params). " + - "Will override any parameters given with explicit setters. Can include multiple parameters in one string.") + "Will override any parameters given with explicit setters. Can include multiple parameters in one string. " + + "e.g., force_row_wise=true") setDefault(passThroughArgs->"") def getPassThroughArgs: String = $(passThroughArgs) def setPassThroughArgs(value: String): this.type = set(passThroughArgs, value) diff --git a/website/docs/features/lightgbm/about.md b/website/docs/features/lightgbm/about.md index 1cc9150ad0..a251e34074 100644 --- a/website/docs/features/lightgbm/about.md +++ b/website/docs/features/lightgbm/about.md @@ -57,6 +57,48 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). +### Arguments + +SynapseML exposes getters/setters for many common LightGBM parameters. +In python, you can use the properties as shown above, or in Scala use the +fluent setters. +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +LightGBM has far more parameters than SynapseML exposes. For cases where you +need to set some parameters that SyanpseML does not expose a setter for, use +passThroughArgs. This is just a free string that you can use to add extra parameters +to the command SynapseML sends to configure LightGBM. + +```python +from synapse.ml.lightgbm import LightGBMClassifier +model = LightGBMClassifier(passThroughArgs="force_row_wise=true", + numIterations=100, + numLeaves=31).fit(train) +``` + +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setPassThroughArgs("force_row_wise=true min_sum_hessian_in_leaf=2e-3") + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +For formatting options and specific argument documentation, see +[LightGBM docs](https://lightgbm.readthedocs.io/en/v3.3.2/Parameters.html). Some +parameters SynapseML will set specifically for the Spark distributed environment and +should not be changed. Some parameters are for cli mode only, and will not work within +Spark. + +Note that you can mix passThroughArgs and explicit args, as shown above. SynapseML will +merge them to create one argument string to send to LightGBM. If you set a parameter in +both places, the passThroughArgs will take precedence. + ### Architecture LightGBM on Spark uses the Simple Wrapper and Interface Generator (SWIG) diff --git a/website/versioned_docs/version-0.10.0/features/lightgbm/about.md b/website/versioned_docs/version-0.10.0/features/lightgbm/about.md index 1cc9150ad0..a251e34074 100644 --- a/website/versioned_docs/version-0.10.0/features/lightgbm/about.md +++ b/website/versioned_docs/version-0.10.0/features/lightgbm/about.md @@ -57,6 +57,48 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). +### Arguments + +SynapseML exposes getters/setters for many common LightGBM parameters. +In python, you can use the properties as shown above, or in Scala use the +fluent setters. +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +LightGBM has far more parameters than SynapseML exposes. For cases where you +need to set some parameters that SyanpseML does not expose a setter for, use +passThroughArgs. This is just a free string that you can use to add extra parameters +to the command SynapseML sends to configure LightGBM. + +```python +from synapse.ml.lightgbm import LightGBMClassifier +model = LightGBMClassifier(passThroughArgs="force_row_wise=true", + numIterations=100, + numLeaves=31).fit(train) +``` + +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setPassThroughArgs("force_row_wise=true min_sum_hessian_in_leaf=2e-3") + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +For formatting options and specific argument documentation, see +[LightGBM docs](https://lightgbm.readthedocs.io/en/v3.3.2/Parameters.html). Some +parameters SynapseML will set specifically for the Spark distributed environment and +should not be changed. Some parameters are for cli mode only, and will not work within +Spark. + +Note that you can mix passThroughArgs and explicit args, as shown above. SynapseML will +merge them to create one argument string to send to LightGBM. If you set a parameter in +both places, the passThroughArgs will take precedence. + ### Architecture LightGBM on Spark uses the Simple Wrapper and Interface Generator (SWIG) diff --git a/website/versioned_docs/version-0.10.1/features/lightgbm/about.md b/website/versioned_docs/version-0.10.1/features/lightgbm/about.md index 1cc9150ad0..a251e34074 100644 --- a/website/versioned_docs/version-0.10.1/features/lightgbm/about.md +++ b/website/versioned_docs/version-0.10.1/features/lightgbm/about.md @@ -57,6 +57,48 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). +### Arguments + +SynapseML exposes getters/setters for many common LightGBM parameters. +In python, you can use the properties as shown above, or in Scala use the +fluent setters. +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +LightGBM has far more parameters than SynapseML exposes. For cases where you +need to set some parameters that SyanpseML does not expose a setter for, use +passThroughArgs. This is just a free string that you can use to add extra parameters +to the command SynapseML sends to configure LightGBM. + +```python +from synapse.ml.lightgbm import LightGBMClassifier +model = LightGBMClassifier(passThroughArgs="force_row_wise=true", + numIterations=100, + numLeaves=31).fit(train) +``` + +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setPassThroughArgs("force_row_wise=true min_sum_hessian_in_leaf=2e-3") + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +For formatting options and specific argument documentation, see +[LightGBM docs](https://lightgbm.readthedocs.io/en/v3.3.2/Parameters.html). Some +parameters SynapseML will set specifically for the Spark distributed environment and +should not be changed. Some parameters are for cli mode only, and will not work within +Spark. + +Note that you can mix passThroughArgs and explicit args, as shown above. SynapseML will +merge them to create one argument string to send to LightGBM. If you set a parameter in +both places, the passThroughArgs will take precedence. + ### Architecture LightGBM on Spark uses the Simple Wrapper and Interface Generator (SWIG) diff --git a/website/versioned_docs/version-0.10.2/features/lightgbm/about.md b/website/versioned_docs/version-0.10.2/features/lightgbm/about.md index 1cc9150ad0..a251e34074 100644 --- a/website/versioned_docs/version-0.10.2/features/lightgbm/about.md +++ b/website/versioned_docs/version-0.10.2/features/lightgbm/about.md @@ -57,6 +57,48 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). +### Arguments + +SynapseML exposes getters/setters for many common LightGBM parameters. +In python, you can use the properties as shown above, or in Scala use the +fluent setters. +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +LightGBM has far more parameters than SynapseML exposes. For cases where you +need to set some parameters that SyanpseML does not expose a setter for, use +passThroughArgs. This is just a free string that you can use to add extra parameters +to the command SynapseML sends to configure LightGBM. + +```python +from synapse.ml.lightgbm import LightGBMClassifier +model = LightGBMClassifier(passThroughArgs="force_row_wise=true", + numIterations=100, + numLeaves=31).fit(train) +``` + +```scala +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier +val classifier = new LightGBMClassifier() + .setPassThroughArgs("force_row_wise=true min_sum_hessian_in_leaf=2e-3") + .setLearningRate(0.2) + .setNumLeaves(50) +``` + +For formatting options and specific argument documentation, see +[LightGBM docs](https://lightgbm.readthedocs.io/en/v3.3.2/Parameters.html). Some +parameters SynapseML will set specifically for the Spark distributed environment and +should not be changed. Some parameters are for cli mode only, and will not work within +Spark. + +Note that you can mix passThroughArgs and explicit args, as shown above. SynapseML will +merge them to create one argument string to send to LightGBM. If you set a parameter in +both places, the passThroughArgs will take precedence. + ### Architecture LightGBM on Spark uses the Simple Wrapper and Interface Generator (SWIG) From 1918ae60dc7a6cac04dc44bb6d32f559d0e0f800 Mon Sep 17 00:00:00 2001 From: Scott Votaw Date: Wed, 30 Nov 2022 13:28:21 -0800 Subject: [PATCH 2/2] doc edits --- website/docs/features/lightgbm/about.md | 9 ++++++--- .../version-0.10.0/features/lightgbm/about.md | 6 ++++-- .../version-0.10.1/features/lightgbm/about.md | 6 ++++-- .../version-0.10.2/features/lightgbm/about.md | 6 ++++-- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/website/docs/features/lightgbm/about.md b/website/docs/features/lightgbm/about.md index a251e34074..7f1373e9f9 100644 --- a/website/docs/features/lightgbm/about.md +++ b/website/docs/features/lightgbm/about.md @@ -57,11 +57,12 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). -### Arguments +### Arguments/Parameters SynapseML exposes getters/setters for many common LightGBM parameters. In python, you can use the properties as shown above, or in Scala use the fluent setters. + ```scala import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier val classifier = new LightGBMClassifier() @@ -74,13 +75,15 @@ need to set some parameters that SyanpseML does not expose a setter for, use passThroughArgs. This is just a free string that you can use to add extra parameters to the command SynapseML sends to configure LightGBM. +In python: ```python from synapse.ml.lightgbm import LightGBMClassifier -model = LightGBMClassifier(passThroughArgs="force_row_wise=true", +model = LightGBMClassifier(passThroughArgs="force_row_wise=true min_sum_hessian_in_leaf=2e-3", numIterations=100, numLeaves=31).fit(train) ``` +In Scala: ```scala import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier val classifier = new LightGBMClassifier() @@ -93,7 +96,7 @@ For formatting options and specific argument documentation, see [LightGBM docs](https://lightgbm.readthedocs.io/en/v3.3.2/Parameters.html). Some parameters SynapseML will set specifically for the Spark distributed environment and should not be changed. Some parameters are for cli mode only, and will not work within -Spark. +Spark. Note that you can mix passThroughArgs and explicit args, as shown above. SynapseML will merge them to create one argument string to send to LightGBM. If you set a parameter in diff --git a/website/versioned_docs/version-0.10.0/features/lightgbm/about.md b/website/versioned_docs/version-0.10.0/features/lightgbm/about.md index a251e34074..e42d173b51 100644 --- a/website/versioned_docs/version-0.10.0/features/lightgbm/about.md +++ b/website/versioned_docs/version-0.10.0/features/lightgbm/about.md @@ -57,7 +57,7 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). -### Arguments +### Arguments/Parameters SynapseML exposes getters/setters for many common LightGBM parameters. In python, you can use the properties as shown above, or in Scala use the @@ -74,13 +74,15 @@ need to set some parameters that SyanpseML does not expose a setter for, use passThroughArgs. This is just a free string that you can use to add extra parameters to the command SynapseML sends to configure LightGBM. +In python: ```python from synapse.ml.lightgbm import LightGBMClassifier -model = LightGBMClassifier(passThroughArgs="force_row_wise=true", +model = LightGBMClassifier(passThroughArgs="force_row_wise=true min_sum_hessian_in_leaf=2e-3", numIterations=100, numLeaves=31).fit(train) ``` +In Scala: ```scala import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier val classifier = new LightGBMClassifier() diff --git a/website/versioned_docs/version-0.10.1/features/lightgbm/about.md b/website/versioned_docs/version-0.10.1/features/lightgbm/about.md index a251e34074..e42d173b51 100644 --- a/website/versioned_docs/version-0.10.1/features/lightgbm/about.md +++ b/website/versioned_docs/version-0.10.1/features/lightgbm/about.md @@ -57,7 +57,7 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). -### Arguments +### Arguments/Parameters SynapseML exposes getters/setters for many common LightGBM parameters. In python, you can use the properties as shown above, or in Scala use the @@ -74,13 +74,15 @@ need to set some parameters that SyanpseML does not expose a setter for, use passThroughArgs. This is just a free string that you can use to add extra parameters to the command SynapseML sends to configure LightGBM. +In python: ```python from synapse.ml.lightgbm import LightGBMClassifier -model = LightGBMClassifier(passThroughArgs="force_row_wise=true", +model = LightGBMClassifier(passThroughArgs="force_row_wise=true min_sum_hessian_in_leaf=2e-3", numIterations=100, numLeaves=31).fit(train) ``` +In Scala: ```scala import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier val classifier = new LightGBMClassifier() diff --git a/website/versioned_docs/version-0.10.2/features/lightgbm/about.md b/website/versioned_docs/version-0.10.2/features/lightgbm/about.md index a251e34074..e42d173b51 100644 --- a/website/versioned_docs/version-0.10.2/features/lightgbm/about.md +++ b/website/versioned_docs/version-0.10.2/features/lightgbm/about.md @@ -57,7 +57,7 @@ model = LightGBMRegressor(application='quantile', For an end to end application, check out the LightGBM [notebook example](../LightGBM%20-%20Overview). -### Arguments +### Arguments/Parameters SynapseML exposes getters/setters for many common LightGBM parameters. In python, you can use the properties as shown above, or in Scala use the @@ -74,13 +74,15 @@ need to set some parameters that SyanpseML does not expose a setter for, use passThroughArgs. This is just a free string that you can use to add extra parameters to the command SynapseML sends to configure LightGBM. +In python: ```python from synapse.ml.lightgbm import LightGBMClassifier -model = LightGBMClassifier(passThroughArgs="force_row_wise=true", +model = LightGBMClassifier(passThroughArgs="force_row_wise=true min_sum_hessian_in_leaf=2e-3", numIterations=100, numLeaves=31).fit(train) ``` +In Scala: ```scala import com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier val classifier = new LightGBMClassifier()