上海网站建设电影联,网站开发构建工具,赣州北京网站建设,做微商做什么网站比较好逻辑回归进行鸢尾花分类的案例
背景说明#xff1a;
基于IDEA Spark 3.4.1 sbt 1.9.3 Spark MLlib 构建逻辑回归鸢尾花分类预测模型#xff0c;这是一个分类模型案例#xff0c;通过该案例#xff0c;可以快速了解Spark MLlib分类预测模型的使用方法。
依赖
ThisBui…逻辑回归进行鸢尾花分类的案例
背景说明
基于IDEA Spark 3.4.1 sbt 1.9.3 Spark MLlib 构建逻辑回归鸢尾花分类预测模型这是一个分类模型案例通过该案例可以快速了解Spark MLlib分类预测模型的使用方法。
依赖
ThisBuild / version : 0.1.0-SNAPSHOT ThisBuild / scalaVersion : 2.13.11 lazy val root (project in file(.)) .settings( name : SparkLearning, idePackagePrefix : Some(cn.lh.spark), libraryDependencies org.apache.spark %% spark-sql % 3.4.1, libraryDependencies org.apache.spark %% spark-core % 3.4.1, libraryDependencies org.apache.hadoop % hadoop-auth % 3.3.6, libraryDependencies org.apache.spark %% spark-streaming % 3.4.1, libraryDependencies org.apache.spark %% spark-streaming-kafka-0-10 % 3.4.1, libraryDependencies org.apache.spark %% spark-mllib % 3.4.1, libraryDependencies mysql % mysql-connector-java % 8.0.30
)代码如下
package cn.lh.spark import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, StringIndexerModel, VectorIndexer, VectorIndexerModel}
import org.apache.spark.ml.linalg.{Vectors,Vector}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession} case class Iris(features: org.apache.spark.ml.linalg.Vector, label: String) /** * 二项逻辑斯蒂回归来解决二分类问题 */
object MLlibLogisticRegression { def main(args: Array[String]): Unit { val spark: SparkSession SparkSession.builder().master(local[2]) .appName(Spark MLlib Demo List).getOrCreate() val irisRDD: RDD[Iris] spark.sparkContext.textFile(F:\\niit\\2023\\2023_2\\Spark\\codes\\data\\iris.txt) .map(_.split(,)).map(p Iris(Vectors.dense(p(0).toDouble, p(1).toDouble, p(2).toDouble, p(3).toDouble), p(4).toString())) import spark.implicits._ val data: DataFrame irisRDD.toDF() data.show() data.createOrReplaceTempView(iris) val df: DataFrame spark.sql(select * from iris where label ! Iris-setosa) df.map(t t(1):t(0)).collect().foreach(println) // 构建ML的pipeline val labelIndex: StringIndexerModel new StringIndexer().setInputCol(label) .setOutputCol(indexedLabel).fit(df) val featureIndexer: VectorIndexerModel new VectorIndexer().setInputCol(features) .setOutputCol(indexedFeatures).fit(df) // 划分数据集 val Array(trainingData, testData) df.randomSplit(Array(0.7, 0.3)) // 设置逻辑回归模型参数 val lr: LogisticRegression new LogisticRegression().setLabelCol(indexedLabel) .setFeaturesCol(indexedFeatures).setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8) // 设置一个labelConverter目的是把预测的类别重新转化成字符型的 val labelConverter: IndexToString new IndexToString().setInputCol(prediction) .setOutputCol(predictedLabel).setLabels(labelIndex.labels) // 构建pipeline设置stage然后调用fit()来训练模型 val lrPipeline: Pipeline new Pipeline().setStages(Array(labelIndex, featureIndexer, lr, labelConverter)) val lrmodle: PipelineModel lrPipeline.fit(trainingData) val lrPredictions: DataFrame lrmodle.transform(testData) lrPredictions.select(predictedLabel, label, features, probability) .collect().foreach { case Row(predictedLabel: String, label: String, features: Vector, prob: Vector) println(s($label, $features) -- prob$prob, predicted Label$predictedLabel)} // 模型评估 val evaluator: MulticlassClassificationEvaluator new MulticlassClassificationEvaluator() .setLabelCol(indexedLabel).setPredictionCol(prediction) val lrAccuracy: Double evaluator.evaluate(lrPredictions) println(Test Error (1.0 - lrAccuracy)) val lrmodel2: LogisticRegressionModel lrmodle.stages(2).asInstanceOf[LogisticRegressionModel] println(Coefficients: lrmodel2.coefficientsIntercept: lrmodel2.interceptnumClasses: lrmodel2.numClassesnumFeatures: lrmodel2.numFeatures) spark.stop() } }运行结果如下