Untitled

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 1

import org.apache.spark.mllib.linalg.

_
import org.apache.spark.mllib.regression._
val rawData = sc.textFile("covtype.data")
rawData.first
val data = rawData.map{ line =>
| val values = line.split(",").map(_.toDouble)
| val featureVector = Vectors.dense(values.init)
| val label = values.last - 1
| LabeledPoint(label, featureVector)
| }
data.first
val Array(trainData, cvData, testData) =
| data.randomSplit(Array(0.8, 0.1, 0.1))
trainData.cache()
cvData.cache()
testData.cache()
trainData.first
trainData.count
cvData.count
testData.count

import org.apache.spark.mllib.evaluation._
import org.apache.spark.mllib.tree._
import org.apache.spark.mllib.tree.model._
import org.apache.spark.rdd._
def getMetrics(model: DecisionTreeModel, data: RDD[LabeledPoint]):
MulticlassMetrics = {
| val predictionsAndLabels = data.map(example =>
| (model.predict(example.features), example.label)
| )
| new MulticlassMetrics(predictionsAndLabels)
| }
val model = DecisionTree.trainClassifier(
| trainData, 7, Map[Int, Int](), "gini", 4, 100)
val metrics = getMetrics(model, cvData)
metrics.precision(0)
metrics
metrics.accuracy
metrics.weightedPrecision
metrics.confusionMatrix
metrics.precision(0)
val arr = metrics.confusionMatrix.toArray
arr.sum
val total = arr.sum
(0 to 6).map(metrics.confusionMatrix(0, _))
(0 to 6).map(metrics.confusionMatrix(0, _)).sum
(0 to 6).map(metrics.confusionMatrix(0, _)).sum/total
(0 to 6).map(i => (0 to 6).map(metrics.confusionMatrix(i, _)).sum/total)
val p = (0 to 6).map(i => (0 to 6).map(metrics.confusionMatrix(i, _)).sum/total)
val overallPrecision = (0 to 6).map(i => metrics.precision(i)*p(i)).sum
metrics.weightedPrecision

You might also like