Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add back diagnosticsInfo for MVAD #1892

Merged
merged 8 commits into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,19 @@ publishDotnetTestBase := {
publishDotnetAssemblyCmd(packagePath, genSleetConfig.value)
}

// This command should be run only when you make an update to DotnetBase proj, and it will override
// existing nuget package with the same version number
val publishDotnetBase = TaskKey[Unit]("publishDotnetBase",
"publish dotnet base nuget package that contains core elements for SynapseML in C#")
publishDotnetBase := {
val dotnetBaseDir = join(baseDirectory.value, "core", "src", "main", "dotnet", "src")
packDotnetAssemblyCmd(join(dotnetBaseDir, "target").getAbsolutePath, dotnetBaseDir)
val packagePath = join(dotnetBaseDir,
// Update the version whenever there's a new release
"target", s"SynapseML.DotnetBase.${dotnetedVersion("0.11.0")}.nupkg").getAbsolutePath
publishDotnetAssemblyCmd(packagePath, genSleetConfig.value)
}

def runTaskForAllInCompile(task: TaskKey[Unit]): Def.Initialize[Task[Seq[Unit]]] = {
task.all(ScopeFilter(
inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import com.microsoft.azure.synapse.ml.io.http.HandlingUtils.{convertAndClose, se
import com.microsoft.azure.synapse.ml.io.http.RESTHelpers.{Client, retry}
import com.microsoft.azure.synapse.ml.io.http._
import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging
import com.microsoft.azure.synapse.ml.param.CognitiveServiceStructParam
import org.apache.commons.io.IOUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.http.client.methods._
Expand Down Expand Up @@ -315,7 +316,7 @@ trait MADBase extends HasOutputCol
.sparkContext.hadoopConfiguration
val key = Option(hc.get(adlsConfig)).orElse(Option(hc.get(blobConfig)))

if (key.isEmpty){
if (key.isEmpty) {
throw new IllegalAccessError("Could not find the storage account credentials." +
s" Make sure your hadoopConfiguration has the" +
s" ''$blobConfig'' or ''$adlsConfig'' configuration set.")
Expand Down Expand Up @@ -495,6 +496,7 @@ class SimpleFitMultivariateAnomaly(override val uid: String) extends Estimator[S
.setLocation(getUrl.split("/".toCharArray)(2).split(".".toCharArray).head)
.setModelId(modelId)
.setIntermediateSaveDir(getIntermediateSaveDir)
.setDiagnosticsInfo(modelInfo("diagnosticsInfo").convertTo[DiagnosticsInfo])
})
}

Expand Down Expand Up @@ -524,6 +526,13 @@ class SimpleDetectMultivariateAnomaly(override val uid: String) extends Model[Si

def getModelId: String = $(modelId)

val diagnosticsInfo = new CognitiveServiceStructParam[DiagnosticsInfo](this, "diagnosticsInfo",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this CognitiveServicesStruct param? Does this provide a general way of letting the python side create wrappers too? If so thats pretty awesome

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I initially added this type of param to support extending JsonEncodableParam for cog service params, and used it for TextAnalyzeTask parameter, but looks like it's been removed and updated somehow.
This is kind of a structured param that is not service param, because it doesn't support Left/Right. And you're right, I should figure out how the python wrapper could access this value, I think right now it can't really access the value inside :(
I'll figure that out and add in this PR

"diagnosticsInfo for training a multivariate anomaly detection model")

def setDiagnosticsInfo(v: DiagnosticsInfo): this.type = set(diagnosticsInfo, v)

def getDiagnosticsInfo: DiagnosticsInfo = $(diagnosticsInfo)

val topContributorCount = new IntParam(this, "topContributorCount", "This is a number" +
" that you could specify N from 1 to 30, which will give you the details of top N contributed variables " +
"in the anomaly results. For example, if you have 100 variables in the model, but you only care the top " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package com.microsoft.azure.synapse.ml.cognitive.anomaly

import com.microsoft.azure.synapse.ml.core.schema.SparkBindings
import spray.json.{DefaultJsonProtocol, RootJsonFormat}
import scala.collection.JavaConverters._

// DMA stands for DetectMultivariateAnomaly
object DMARequest extends SparkBindings[DMARequest]
Expand Down Expand Up @@ -33,7 +34,17 @@ case class DMAVariableState(variable: Option[String],
filledNARatio: Option[Double],
effectiveCount: Option[Int],
firstTimestamp: Option[String],
lastTimestamp: Option[String])
lastTimestamp: Option[String]) {
def getVariable: String = this.variable.get

def getFilledNARatio: Double = this.filledNARatio.get

def getEffectiveCount: Int = this.effectiveCount.get

def getFirstTimestamp: String = this.firstTimestamp.get

def getLastTimestamp: String = this.lastTimestamp.get
}

case class DMASetupInfo(dataSource: String,
topContributorCount: Option[Int],
Expand Down Expand Up @@ -84,12 +95,24 @@ case class MAEModelInfo(slidingWindow: Option[Int],

case class AlignPolicy(alignMode: Option[String], fillNAMethod: Option[String], paddingValue: Option[Int])

case class DiagnosticsInfo(modelState: Option[ModelState], variableStates: Option[Seq[DMAVariableState]])
case class DiagnosticsInfo(modelState: Option[ModelState], variableStates: Option[Seq[DMAVariableState]]) {
def getModelState: ModelState = this.modelState.get

def getVariableStates: java.util.List[DMAVariableState] = this.variableStates.get.asJava
}

case class ModelState(epochIds: Option[Seq[Int]],
trainLosses: Option[Seq[Double]],
validationLosses: Option[Seq[Double]],
latenciesInSeconds: Option[Seq[Double]])
latenciesInSeconds: Option[Seq[Double]]) {
def getEpochIds: java.util.List[Int] = this.epochIds.getOrElse(Seq()).asJava

def getTrainLosses: java.util.List[Double] = this.trainLosses.getOrElse(Seq()).asJava

def getValidationLosses: java.util.List[Double] = this.validationLosses.getOrElse(Seq()).asJava

def getLatenciesInSeconds: java.util.List[Double] = this.latenciesInSeconds.getOrElse(Seq()).asJava
}

object MADJsonProtocol extends DefaultJsonProtocol {
implicit val DMAReqEnc: RootJsonFormat[DMARequest] = jsonFormat4(DMARequest.apply)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import spray.json.{DefaultJsonProtocol, _}
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
import scala.collection.mutable
import scala.collection.mutable.HashSet


case class MADListModelsResponse(models: Seq[MADModel],
Expand Down Expand Up @@ -147,6 +146,7 @@ class SimpleFitMultivariateAnomalySuite extends EstimatorFuzzing[SimpleFitMultiv
val smae = simpleMultiAnomalyEstimator
val model = smae.fit(df)
smae.cleanUpIntermediateData()
assert(model.getDiagnosticsInfo.variableStates.get.length.equals(3))

model.setStartTime(startTime)
.setEndTime(endTime)
Expand Down
84 changes: 79 additions & 5 deletions core/src/main/dotnet/src/Base/Schemas.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public sealed class TextAnalyzeTask : IJvmObjectReferenceProvider
public Dictionary<string, string> Parameters { get; init; }

public TextAnalyzeTask(Dictionary<string, string> parameters)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.TextAnalyzeTask", parameters.ToJavaHashMap()))
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.text.TextAnalyzeTask", parameters.ToJavaHashMap()))
{
}

Expand Down Expand Up @@ -46,7 +46,7 @@ public sealed class TimeSeriesPoint : IJvmObjectReferenceProvider
public double Value { get; init; }

public TimeSeriesPoint(string timestamp, double value)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.TimeSeriesPoint", timestamp, value))
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.anomaly.TimeSeriesPoint", timestamp, value))
{
}

Expand All @@ -68,7 +68,7 @@ public sealed class TextAndTranslation : IJvmObjectReferenceProvider
public string Translation { get; init; }

public TextAndTranslation(string text, string translation)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.TextAndTranslation", text, translation))
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.translate.TextAndTranslation", text, translation))
{
}

Expand All @@ -92,7 +92,7 @@ public sealed class TargetInput : IJvmObjectReferenceProvider
public string? StorageSource { get; init; }

public TargetInput(string targetUrl, string language, string? category = null, Glossary[]? glossaries = null, string? storageSource = null)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.TargetInput", targetUrl, language, category, glossaries, storageSource))
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.translate.TargetInput", targetUrl, language, category, glossaries, storageSource))
{
}

Expand Down Expand Up @@ -123,7 +123,7 @@ public sealed class Glossary : IJvmObjectReferenceProvider
public string? Version { get; init; }

public Glossary(string format, string glossaryUrl, string? storageSource = null, string? version = null)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.Glossary", format, glossaryUrl, storageSource, version))
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.translate.Glossary", format, glossaryUrl, storageSource, version))
{
}

Expand Down Expand Up @@ -196,5 +196,79 @@ internal ICENumericFeature(JvmObjectReference jvmObject)
public int GetNumSplits() => (int)Reference.Invoke("getNumSplits");
}

public sealed class ModelState : IJvmObjectReferenceProvider
{
public int[]? EpochIds { get; init; }
public double[]? TrainLosses { get; init; }
public double[]? ValidationLosses { get; init; }
public double[]? LatenciesInSeconds { get; init; }

public ModelState(int[]? epochIds = null, double[]? trainLosses = null, double[]? validationLosses = null, double[]? latenciesInSeconds = null)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.anomaly.ModelState", epochIds, trainLosses, validationLosses, latenciesInSeconds))
{
}

internal ModelState(JvmObjectReference jvmObject)
{
Reference = jvmObject;
this.EpochIds = (int[])Reference.Invoke("epochIds");
this.TrainLosses = (double[])Reference.Invoke("trainLosses");
this.ValidationLosses = (double[])Reference.Invoke("validationLosses");
this.LatenciesInSeconds = (double[])Reference.Invoke("latenciesInSeconds");
}

public JvmObjectReference Reference { get; init; }

}

public sealed class DMAVariableState : IJvmObjectReferenceProvider
{
public string? Variable { get; init; }
public double? FilledNARatio { get; init; }
public int? EffectiveCount { get; init; }
public string? FirstTimestamp { get; init; }
public string? LastTimestamp { get; init; }


public DMAVariableState(string? variable = null, double? filledNARatio = null, int? effectiveCount = null, string? firstTimestamp = null, string? lastTimestamp = null)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.anomaly.DMAVariableState", variable, filledNARatio, effectiveCount, firstTimestamp, lastTimestamp))
{
}

internal DMAVariableState(JvmObjectReference jvmObject)
{
Reference = jvmObject;
this.Variable = (string)Reference.Invoke("variable");
this.FilledNARatio = (double)Reference.Invoke("filledNARatio");
this.EffectiveCount = (int)Reference.Invoke("effectiveCount");
this.FirstTimestamp = (string)Reference.Invoke("firstTimestamp");
this.LastTimestamp = (string)Reference.Invoke("lastTimestamp");
}

public JvmObjectReference Reference { get; init; }

}

public sealed class DiagnosticsInfo: IJvmObjectReferenceProvider
{
public ModelState? ModelState { get; init; }
public DMAVariableState? VariableStates { get; init; }

public DiagnosticsInfo(ModelState? modelState = null, DMAVariableState? variableStates = null)
: this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.cognitive.anomaly.DiagnosticsInfo", modelState, variableStates))
{
}

internal DiagnosticsInfo(JvmObjectReference jvmObject)
{
Reference = jvmObject;
this.ModelState = new ModelState((JvmObjectReference)Reference.Invoke("modelState"));
this.VariableStates = new DMAVariableState((JvmObjectReference)Reference.Invoke("variableStates"));
}

public JvmObjectReference Reference { get; init; }

}

#nullable disable
}
2 changes: 1 addition & 1 deletion core/src/main/dotnet/src/dotnetBase.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<IsPackable>true</IsPackable>

<Description>SynapseML .NET Base</Description>
<Version>0.9.1</Version>
<Version>0.11.0</Version>
</PropertyGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ object DotnetCodegen {
|
| <ItemGroup>
| <PackageReference Include="Microsoft.Spark" Version="2.1.1" />
| <PackageReference Include="SynapseML.DotnetBase" Version="0.9.1" />
| <PackageReference Include="SynapseML.DotnetBase" Version="0.11.0" />
| <PackageReference Include="IgnoresAccessChecksToGenerator" Version="0.4.0" PrivateAssets="All" />
| $newtonsoftDep
| </ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,22 @@ class CognitiveServiceStructParam[T: TypeTag](parent: Params,

override def rValue(v: T): String = RWrappableParam.rDefaultRender(v)

override private[ml] def dotnetGetter(capName: String): String = {
dotnetType match {
case "DiagnosticsInfo" =>
s"""|public $dotnetType Get$capName()
|{
| var jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\");
| return new $dotnetType(jvmObject);
|}
|""".stripMargin
case _ =>
s"""|public $dotnetType Get$capName() =>
| ($dotnetType)Reference.Invoke(\"get$capName\");
|""".stripMargin
}
}

override private[ml] def dotnetTestValue(v: T): String = DotnetWrappableParam.dotnetDefaultRender(v)

override private[ml] def dotnetTestSetterLine(v: T): String = {
Expand All @@ -212,6 +228,7 @@ class CognitiveServiceStructParam[T: TypeTag](parent: Params,

private[ml] def dotnetType: String = typeOf[T].toString match {
case "Seq[com.microsoft.azure.synapse.ml.cognitive.text.TextAnalyzeTask]" => "TextAnalyzeTask[]"
case "com.microsoft.azure.synapse.ml.cognitive.anomaly.DiagnosticsInfo" => "DiagnosticsInfo"
case _ => throw new Exception(s"unsupported type ${typeOf[T].toString}, please add implementation")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ object DotnetTestGen {
| <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
| </PackageReference>
| <PackageReference Include="Microsoft.Spark" Version="2.1.1" />
| <PackageReference Include="SynapseML.DotnetBase" Version="0.9.1" />
| <PackageReference Include="SynapseML.DotnetBase" Version="0.11.0" />
| <PackageReference Include="SynapseML.DotnetE2ETest" Version="${conf.dotnetVersion}" />
| <PackageReference Include="SynapseML.$curProject" Version="${conf.dotnetVersion}" />
| $referenceCore
Expand Down