// Let's see a basic usage of pretrained pipelines //import com.johnsnowlabs.nlp.pretrained.pipelines.en.BasicPipeline import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline PretrainedPipeline("explain_document_ml").annotate("Please parse this sentence. Thanks")
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline
res0: Map[String,Seq[String]] = Map(checked -> List(Please, parse, this, sentence, ., Thanks), document -> List(Please parse this sentence. Thanks), pos -> ArrayBuffer(VB, NN, DT, NN, ., NNS), lemmas -> List(Please, parse, this, sentence, ., Thanks), token -> List(Please, parse, this, sentence, ., Thanks), stems -> List(pleas, pars, thi, sentenc, ., thank), sentence -> List(Please parse this sentence., Thanks))
val annotations = PretrainedPipeline("explain_document_ml").annotate(Array("We are very happy about SparkNLP", "And this is just another sentence")) annotations.foreach(println(_, "\n"))
(Map(checked -> List(We, are, very, happy, about, SparkNLP), document -> List(We are very happy about SparkNLP), pos -> ArrayBuffer(PRP, VBP, RB, JJ, IN, NNP), lemmas -> List(We, be, very, happy, about, SparkNLP), token -> List(We, are, very, happy, about, SparkNLP), stems -> List(we, ar, veri, happi, about, sparknlp), sentence -> List(We are very happy about SparkNLP)),
)
(Map(checked -> List(And, this, is, just, another, sentence), document -> List(And this is just another sentence), pos -> ArrayBuffer(CC, DT, VBZ, RB, DT, NN), lemmas -> List(And, this, be, just, another, sentence), token -> List(And, this, is, just, another, sentence), stems -> List(and, thi, i, just, anoth, sentenc), sentence -> List(And this is just another sentence)),
)
annotations: Array[Map[String,Seq[String]]] = Array(Map(checked -> List(We, are, very, happy, about, SparkNLP), document -> List(We are very happy about SparkNLP), pos -> ArrayBuffer(PRP, VBP, RB, JJ, IN, NNP), lemmas -> List(We, be, very, happy, about, SparkNLP), token -> List(We, are, very, happy, about, SparkNLP), stems -> List(we, ar, veri, happi, about, sparknlp), sentence -> List(We are very happy about SparkNLP)), Map(checked -> List(And, this, is, just, another, sentence), document -> List(And this is just another sentence), pos -> ArrayBuffer(CC, DT, VBZ, RB, DT, NN), lemmas -> List(And, this, be, just, another, sentence), token -> List(And, this, is, just, another, sentence), stems -> List(and, thi, i, just, anoth, sentenc), sentence -> List(And this is just another sentence)))
// How about annotating the entire DataFrame import spark.implicits._ val data = Seq("hello, this is an example sentence").toDF("mainColumn") PretrainedPipeline("explain_document_ml").annotate(data, "mainColumn").show()
+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
| text| document| sentence| token| checked| lemmas| stems| pos|
+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|hello, this is an...|[[document, 0, 33...|[[document, 0, 33...|[[token, 0, 4, he...|[[token, 0, 4, he...|[[token, 0, 4, he...|[[token, 0, 4, he...|[[pos, 0, 4, UH, ...|
+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
import spark.implicits._
data: org.apache.spark.sql.DataFrame = [mainColumn: string]
import com.johnsnowlabs.nlp.pretrained.pipelines.en.AdvancedPipeline //Annotate with Advacedpipeline: This may take some time to download everything you need, but just for the first time :) AdvancedPipeline().annotate("Please parse this sentence. Thanks") AdvancedPipeline().annotate(Array("We are very happy about SparkNLP", "And this is just another sentence"))
notebook:1: error: object pipelines is not a member of package com.johnsnowlabs.nlp.pretrained
import com.johnsnowlabs.nlp.pretrained.pipelines.en.AdvancedPipeline
^
notebook:5: error: not found: value AdvancedPipeline
AdvancedPipeline().annotate(Array("We are very happy about SparkNLP", "And this is just another sentence"))
^
notebook:4: error: not found: value AdvancedPipeline
AdvancedPipeline().annotate("Please parse this sentence. Thanks")
^
SDS-2.x, Scalable Data Engineering Science
This is a minor augnmentation/update of:
Last refresh: Never