import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import DocTable from "@theme/DocumentationTable";
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg",)
], ["source",])
analyzeLayout = (AnalyzeLayout()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("layout")
.setConcurrency(5))
(analyzeLayout.transform(imageDf)
.withColumn("lines", flatten(col("layout.analyzeResult.readResults.lines")))
.withColumn("readLayout", col("lines.text"))
.withColumn("tables", flatten(col("layout.analyzeResult.pageResults.tables")))
.withColumn("cells", flatten(col("tables.cells")))
.withColumn("pageLayout", col("cells.text"))
.select("source", "readLayout", "pageLayout")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg"
).toDF("source")
val analyzeLayout = (new AnalyzeLayout()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("layout")
.setConcurrency(5))
analyzeLayout.transform(imageDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",),
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",)
], ["image",])
analyzeReceipts = (AnalyzeReceipts()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("image")
.setOutputCol("receipts")
.setConcurrency(5))
analyzeReceipts.transform(imageDf).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png"
).toDF("source")
val analyzeReceipts = (new AnalyzeReceipts()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("receipts")
.setConcurrency(5))
analyzeReceipts.transform(imageDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/business_card.jpg",)
], ["source",])
analyzeBusinessCards = (AnalyzeBusinessCards()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("businessCards")
.setConcurrency(5))
analyzeBusinessCards.transform(imageDf).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/business_card.jpg"
).toDF("source")
val analyzeBusinessCards = (new AnalyzeBusinessCards()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("businessCards")
.setConcurrency(5))
analyzeBusinessCards.transform(imageDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png",)
], ["source",])
analyzeInvoices = (AnalyzeInvoices()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("invoices")
.setConcurrency(5))
(analyzeInvoices
.transform(imageDf)
.withColumn("documents", explode(col("invoices.analyzeResult.documentResults.fields")))
.select("source", "documents")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png"
).toDF("source")
val analyzeInvoices = (new AnalyzeInvoices()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("invoices")
.setConcurrency(5))
analyzeInvoices.transform(imageD4).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/id1.jpg",)
], ["source",])
analyzeIDDocuments = (AnalyzeIDDocuments()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("ids")
.setConcurrency(5))
(analyzeIDDocuments
.transform(imageDf)
.withColumn("documents", explode(col("ids.analyzeResult.documentResults.fields")))
.select("source", "documents")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/id1.jpg"
).toDF("source")
val analyzeIDDocuments = (new AnalyzeIDDocuments()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("ids")
.setConcurrency(5))
analyzeIDDocuments.transform(imageDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" # put your own modelId here
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png",)
], ["source",])
analyzeCustomModel = (AnalyzeCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setImageUrlCol("source")
.setOutputCol("output")
.setConcurrency(5))
(analyzeCustomModel
.transform(imageDf)
.withColumn("keyValuePairs", flatten(col("output.analyzeResult.pageResults.keyValuePairs")))
.withColumn("keys", col("keyValuePairs.key.text"))
.withColumn("values", col("keyValuePairs.value.text"))
.withColumn("keyValuePairs", create_map(lit("key"), col("keys"), lit("value"), col("values")))
.select("source", "keyValuePairs")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" // put your own modelId here
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png"
).toDF("source")
val analyzeCustomModel = (new AnalyzeCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setImageUrlCol("source")
.setOutputCol("output")
.setConcurrency(5))
analyzeCustomModel.transform(imageDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" # put your own modelId here
emptyDf = spark.createDataFrame([("",)])
getCustomModel = (GetCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setIncludeKeys(True)
.setOutputCol("model")
.setConcurrency(5))
(getCustomModel
.transform(emptyDf)
.withColumn("modelInfo", col("model.ModelInfo"))
.withColumn("trainResult", col("model.TrainResult"))
.select("modelInfo", "trainResult")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" // put your own modelId here
val emptyDf = Seq("").toDF()
val getCustomModel = (new GetCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setIncludeKeys(true)
.setOutputCol("model")
.setConcurrency(5))
getCustomModel.transform(emptyDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
emptyDf = spark.createDataFrame([("",)])
listCustomModels = (ListCustomModels()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOp("full")
.setOutputCol("models")
.setConcurrency(5))
(listCustomModels
.transform(emptyDf)
.withColumn("modelIds", col("models.modelList.modelId"))
.select("modelIds")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val emptyDf = Seq("").toDF()
val listCustomModels = (new ListCustomModels()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOp("full")
.setOutputCol("models")
.setConcurrency(5))
listCustomModels.transform(emptyDf).show()
<Tabs
defaultValue="py"
values={[
{label: Python
, value: py
},
{label: Scala
, value: scala
},
]}>
from synapse.ml.cognitive import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg",)
], ["source",])
analyzeDocument = (AnalyzeDocument()
# For supported prebuilt models, please go to documentation page for details
.setPrebuiltModelId("prebuilt-layout")
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("result")
.setConcurrency(5))
(analyzeDocument.transform(imageDf)
.withColumn("content", col("result.analyzeResult.content"))
.withColumn("cells", flatten(col("result.analyzeResult.tables.cells")))
.withColumn("cells", col("cells.content"))
.select("source", "result", "content", "cells")).show()
import com.microsoft.azure.synapse.ml.cognitive._
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg"
).toDF("source")
val analyzeDocument = (new AnalyzeDocument()
.setPrebuiltModelId("prebuilt-layout")
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("result")
.setConcurrency(5))
analyzeDocument.transform(imageDf).show()