본문 바로가기
네이버클라우드/AI

AI 6일차 (2023-05-15) 인공지능 기초 _머신러닝 - Tree 모델 : DecisionTreeClassifier, DecisionTreeRegressor

by prometedor 2023. 5. 15.

Decision Tree

https://tensorflowkorea.files.wordpress.com/2017/06/2-22.png

 

ㄴ 뷴류와 회귀 문제에 널리 사용하는 모델

ㄴ 기본적으로 결정 트리는 결정에 다다르기 위해 예/아니오 질문을 이어 나가면서 학습

 

 

 

https://tensorflowkorea.files.wordpress.com/2017/06/2-24.png?w=1250
https://tensorflowkorea.files.wordpress.com/2017/06/2-26.png?w=1250

 

ㄴ scikit-learn에서 결정 트리는 DecisionTreeRegressor와 DecisionTreeClassifier에 구현되어 있음

 

 

DecisionTreeClassifier

ml04_tree_iris.py

# 1. iris
import numpy as np
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import MaxAbsScaler, RobustScaler

# 1. 데이터
datasets = load_iris()  # 다중분류
x = datasets['data']
y = datasets.target

# print(x.shape, y.shape)     # (150, 4) (150,)
# print('y의 라벨 값 :', np.unique(y))    # y의 라벨 값 : [0 1 2]
x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)

# Scaler 적용
scaler = MinMaxScaler()
scaler.fit(x_train)                 # train 은 fit, transform 모두 해줘야 함
x_train = scaler.transform(x_train) # train 은 fit, transform 모두 해줘야 함
x_test = scaler.transform(x_test)   # test 는 transform 만 하면 됨


# 2. 모델
model = DecisionTreeClassifier()

# 3. 훈련
model.fit(x_train, y_train)


# 4. 평가, 예측
result = model.score(x_test, y_test)
print('결과 acc : ', result)


# SVC() 결과 acc :  0.9777777777777777
# LinearSVC() 결과 acc :  0.9777777777777777
# my tf keras 모델 결과 acc :  1.0
# MinMaxScaler() 결과 acc :  0.9777777777777777
# ===============================================
# 결과 acc :  0.9555555555555556

 

ml04_tree_cancer.py

# 2. cancer
import numpy as np
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import MaxAbsScaler, RobustScaler

import tensorflow as tf
tf.random.set_seed(77)  # weight 의 난수값 조정

# 1. 데이터
datasets = load_breast_cancer()  # 이진분류
x = datasets['data']
y = datasets.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)

# Scaler 적용
scaler = StandardScaler()
scaler.fit(x_train)                 # train 은 fit, transform 모두 해줘야 함
x_train = scaler.transform(x_train) # train 은 fit, transform 모두 해줘야 함
x_test = scaler.transform(x_test)   # test 는 transform 만 하면 됨


# 2. 모델
model = DecisionTreeClassifier()


# 3. 훈련
model.fit(x_train, y_train)


# 4. 평가, 예측
result = model.score(x_test, y_test)
print('결과 acc : ', result)

# SVC() 결과 acc :  0.9064327485380117
# LinearSVC() 결과 acc :  0.9122807017543859
# my tf keras 모델 결과 acc :  0.9298245614035088
# StandardScaler() 결과 acc :  0.9766081871345029
# ===============================================
# 결과 acc :  0.9415204678362573

 

ml04_tree_wine.py

# 3. wine
import numpy as np
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import MaxAbsScaler, RobustScaler

import tensorflow as tf
tf.random.set_seed(77)  # weight 의 난수값 조정

# 1. 데이터
datasets = load_wine()  # 다중분류
x = datasets['data']
y = datasets.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)

# Scaler 적용
scaler = MinMaxScaler()
scaler.fit(x_train)                 # train 은 fit, transform 모두 해줘야 함
x_train = scaler.transform(x_train) # train 은 fit, transform 모두 해줘야 함
x_test = scaler.transform(x_test)   # test 는 transform 만 하면 됨


# 2. 모델
model = DecisionTreeClassifier()


# 3. 훈련
model.fit(x_train, y_train)


# 4. 평가, 예측
result = model.score(x_test, y_test)
print('결과 acc : ', result)

# SVC() 결과 acc :  0.5555555555555556
# LinearSVC() 결과 acc :  0.7222222222222222
# my tf keras 모델 결과 acc :  0.9259259104728699
# MinMaxScaler() 결과 acc :  0.9814814814814815
# =================================================
# 결과 acc :  0.8518518518518519

 

 

DecisionTreeRegressor

ml04_tree_california.py

# 4. california (SVR, LinearSVR)
import numpy as np
from sklearn.svm import SVR, LinearSVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import MaxAbsScaler, RobustScaler

import tensorflow as tf
tf.random.set_seed(77) #weight의 난수값 조절

#1. 데이터
datasets = fetch_california_housing()
x = datasets.data
y = datasets.target


x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size = 0.7,  random_state=100, shuffle= True 
)

# Scaler 적용
# scaler = MinMaxScaler()
# scaler = StandardScaler()
# scaler = MaxAbsScaler()
scaler = RobustScaler()
scaler.fit(x_train)                 # train 은 fit, transform 모두 해줘야 함
x_train = scaler.transform(x_train) # train 은 fit, transform 모두 해줘야 함
x_test = scaler.transform(x_test)   # test 는 transform 만 하면 됨

# 2. 모델
model = DecisionTreeRegressor()


# 3. 훈련
model.fit(x_train, y_train)


# 4. 평가, 예측
result = model.score(x_test, y_test)
print('결과 r2 : ', result)

# SVR() 결과 r2 :  -0.01663695941103427
# 결과 r2 :  0.06830124384888547
# my tf keras 모델 r2스코어 :  0.5346585367965508
# RobustScaler 적용 후 결과 r2 :  0.6873119065345796
# ====================================================
# 결과 r2 :  0.612701922946608