pyspark线性回归(弹性网)
2021-08-27 本文已影响0人
米斯特芳
没什么好解释的,上代码
from pyspark.ml.regression import LinearRegression
from pyspark.sql import SparkSession
spark = SparkSession\
.builder\
.appName("LinearRegressionWithElasticNet")\
.getOrCreate()
training = spark.read.format("libsvm")\
.load("sample_linear_regression_data.txt")
# loss:squaredError, huber
# solver:auto, normal, l-bfgs
# elasticNetParam:控制L1正则与L2正则的比例,0即L2,1即L1。计算规则:L1参数为regParam*elasticNetParam,L2参数为regParam*(1-elasticNetParam)
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
lrModel = lr.fit(training)
print("Coefficients: %s" % str(lrModel.coefficients))
print("Intercept: %s" % str(lrModel.intercept))
# Summarize the model over the training set and print out some metrics
trainingSummary = lrModel.summary
print("numIterations: %d" % trainingSummary.totalIterations)
print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
trainingSummary.residuals.show()
print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
print("r2: %f" % trainingSummary.r2)