Note_Tech

All technological notes.


Project maintained by simonangel-fong Hosted on GitHub Pages — Theme by mattgraham

Machine Learning - Regression

back


Regression


Linear Regression


Example: Predict car age and speed

import matplotlib.pyplot as plt
from scipy import stats

age_list = [5, 7, 8, 7, 2, 17, 2, 9, 4, 11, 12, 9, 6]
speed_list = [99, 86, 87, 88, 111, 86, 103, 87, 94, 78, 77, 85, 86]

slope, intercept, r, p, std_err = stats.linregress(age_list, speed_list)

print("slope", slope)
print("intercept", intercept)
print("rvalue", r)
print("pvalue", p)
print("Standard error", std_err)

# slope -1.7512877115526118
# intercept 103.10596026490066
# rvalue -0.758591524376155
# pvalue 0.002646873922456106
# Standard error 0.453536157607742

# Create a function that uses the slope and intercept values to return a new value.
# This new value represents where on the y-axis the corresponding x value will be placed
# Predict Future Values

def predict_speed(x_age):
    return slope * x_age + intercept


# Run each value of the x array through the function.
# This will result in a new array with new values for the y-axis
predict_speed_list = list(map(predict_speed, age_list))

# Draw the original scatter plot:
plt.scatter(age_list, speed_list)
# Draw the line of linear regression
plt.plot(age_list, predict_speed_list, color='red')

plt.show()  # Display the diagram

linear_regression


Example: Bad Fit

# Bad Fit
import matplotlib.pyplot as plt
from scipy import stats

age_list = [89,43,36,36,95,10,66,34,38,20,26,29,48,64,6,5,36,66,72,40]
speed_list = [21,46,3,35,67,95,53,72,58,10,26,34,90,33,38,20,56,2,47,15]

slope, intercept, r, p, std_err = stats.linregress(age_list, speed_list)

print("slope", slope)
print("intercept", intercept)
print("rvalue", r)
print("pvalue", p)
print("Standard error", std_err)

# slope 0.01391658139845263
# intercept 40.452282828936454
# rvalue 0.01331814154297491
# pvalue 0.955558800440106
# Standard error 0.24627150586388075

def predict_speed(x_age):
    return slope * x_age + intercept


predict_speed_list = list(map(predict_speed, age_list))

plt.scatter(age_list, speed_list)
plt.plot(age_list, predict_speed_list, color='red')

plt.show()

linear_regression_bad_fit


Polynomial Regression 多项式回归

# How well does my data fit in a polynomial regression?
import numpy
from sklearn.metrics import r2_score

age_list = [1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 21, 22]
speed_list = [100, 90, 80, 60, 60, 55, 60, 65,
              70, 70, 75, 76, 78, 79, 90, 99, 99, 100]

predict_model = numpy.poly1d(numpy.polyfit(age_list, speed_list, 3))

# compute r-squared
print(r2_score(speed_list, predict_model(age_list)))
# 0.9432150416451026

# The result 0.94 shows that there is a very good relationship, and we can use polynomial regression in future predictions.



Example: Predict car age and speed

import numpy
import matplotlib.pyplot as plt

age_list = [1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 21, 22]
speed_list = [100, 90, 80, 60, 60, 55, 60, 65,
              70, 70, 75, 76, 78, 79, 90, 99, 99, 100]

# make a polynomial model, 假设是3项,即最高次是2.
predict_model = numpy.poly1d(numpy.polyfit(age_list, speed_list, 3))

# specify how the line will display, we start at position 1, and end at position 22:
hypothesized_age = numpy.linspace(1, 22, 100)
# get a list of predict speed
predict_speed_list = predict_model(hypothesized_age)

# Draw the scatter plot of train dataset
plt.scatter(age_list, speed_list)

# Draw the line of polynomial regression
plt.plot(hypothesized_age, predict_speed_list, color='red')

plt.show()      # Display the diagram

polynomial_regression


Exmaple: Bad Fit

import numpy
import matplotlib.pyplot as plt

age_list = [89, 43, 36, 36, 95, 10, 66, 34, 38, 20,
            26, 29, 48, 64, 6, 5, 36, 66, 72, 40]
speed_list = [21, 46, 3, 35, 67, 95, 53, 72, 58, 10,
              26, 34, 90, 33, 38, 20, 56, 2, 47, 15]

predict_model = numpy.poly1d(numpy.polyfit(age_list, speed_list, 3))

assum_age_list = numpy.linspace(2, 95, 100)

plt.scatter(age_list, speed_list)
plt.plot(assum_age_list, predict_model(assum_age_list))
plt.show()


print(r2_score(speed_list, predict_model(age_list)))
# 0.009952707566680541

polynomial_regression_bad_fit


Multiple Regression


Example: Predict CO2 emission with Weight and Volume

# Example: Predict CO2 emission with Weight and Volume

import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model

FILE_PATH = "./data.csv"
data = pd.read_csv(FILE_PATH)

# print(data.info)        # Print a concise summary of a DataFrame.
# print(data.columns)        # Print columns' names
# data.shape              # 36 rows(heading included) and 5 columns

# values: exclude heading
x_list = data[["Weight", "Volume"]].values

y_list = data[["CO2"]].values
# print(x_list)
# print(y_list)

# 普通最小二乘法(OLS)是一种用于在线性回归模型中估计未知参数的线性最小二乘法
# return an object of the class Ordinary least squares Linear Regression
predict_model = linear_model.LinearRegression()

# print(type(predict_model))       # <class 'sklearn.linear_model._base.LinearRegression'>
# print(isinstance(predict_model, linear_model.LinearRegression))     # True

# Fit linear model.
# x: Training data.
# y: Target values.
predict_model.fit(x_list, y_list)

# Estimated coefficients for the linear regression problem.
coefficients_list = predict_model.coef_

print("Coefficients: ",coefficients_list)        # [[0.00755095 0.00780526]]
# if the weight increase by 1kg, the CO2 emission increases by 0.00755095g.
print("Weight coefficient: ",coefficients_list[0][0])
#  if the engine size (Volume) increases by 1 cm3, the CO2 emission increases by 0.00780526 g.
print("Volume coefficient: ",coefficients_list[0][1])

# predict(): Predict using the linear model.
# X: array-like
# Returns: Returns predicted values.
predict_value = predict_model.predict([[3300, 1300]])
# print("predict_value",predict_value)            #  [[114.75968007]]

# get predict values based on the training data
predict_list = predict_model.predict(x_list)
print("predict_list: ",predict_list)


Scale Standardization

# Scale Standardization

import pandas
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
stdScale = StandardScaler()

df = pandas.read_csv("data.csv")

x_list = df[['Weight', 'Volume']].values

scaled_xlist = stdScale.fit_transform(x_list)

print("scaled_xlist:", scaled_xlist)
print("first_scaled:", scaled_xlist[0])

TOP