Note_Tech

All technological notes.


Project maintained by simonangel-fong Hosted on GitHub Pages — Theme by mattgraham

Preprocessing - Categorical Data

Back


Categorical Data

import pandas as pd

cars = pd.read_csv('data.csv')
cars.head()
# 	Car	Model	Volume	Weight	CO2
# 0	Toyoty	Aygo	1000	790	99
# 1	Mitsubishi	Space Star	1200	1160	95
# 2	Skoda	Citigo	1000	929	95
# 3	Fiat	500	900	865	90
# 4	Mini	Cooper	1500	1140	105

One Hot Encoding


ohe_cars = pd.get_dummies(cars[['Car']])
# print(ohe_cars.to_string())

X = pd.concat([cars[['Volume', 'Weight']], ohe_cars], axis=1)
# print(X.to_string())

y = cars['CO2']
# print(y.to_string())
from sklearn import linear_model
regr = linear_model.LinearRegression()
regr.fit(X, y)

# predict the CO2 emission of a Volvo where the weight is 2300kg, and the volume is 1300cm3:
predictedCO2 = regr.predict(
    [[2300, 1300, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])
print(predictedCO2)     # [122.45153299]

Dummifying


TOP