본문 바로가기
네이버클라우드/AI

AI 6일차 (2023-05-15) 인공지능 기초 _머신러닝 - All_Estimator

by prometedor 2023. 5. 15.

All_Estimator

ㄴ scikit-learn에 구현된 모든 추정기(estimator) 객체의 리스트를 반환

ㄴ scikit-learn은 다양한 종류의 추정기를 제공

ㄴ 분류 모델을 위한 DecisionTreeClassifier, RandomForestClassifier, LogisticRegression 등이 있음

ㄴ 회귀 모델을 위한 LinearRegression, RandomForestRegressor, GradientBoostingRegressor 등이 있음
ㄴ 이러한 다양한 추정기를 사용하여 데이터 분석 및 예측 모델링을 수행할 수 있음

 

 

ml06_all_estimator01.py

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import all_estimators
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')   # 경고 무시

# 1. 데이터
datasets = load_iris()
x = datasets.data
y = datasets.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=42, shuffle=True
)

# Scaler
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# 2. 모델
allAlgorithms = all_estimators(type_filter='classifier')
print('allAlgorithms : ', allAlgorithms)
print('몇 개? : ', len(allAlgorithms))  # 41

# 3. 출력(평가, 예측)
for (name, algorithm) in allAlgorithms:		# try, except 문으로 출력 중단 방지
    try :
        model = algorithm()
        model.fit(x_train, y_train)
        y_predict = model.predict(x_test)
        acc = accuracy_score(y_test, y_predict)
        print(name, '의 정답률 : ', acc)
    except :
        print(name, '출력 안 됨')

 

ml06_all_estimator04.py

import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import all_estimators
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')   # 경고 무시

# 1. 데이터
datasets = load_breast_cancer()
x = datasets.data
y = datasets.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=42, shuffle=True
)

# Scaler
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# 2. 모델
allAlgorithms = all_estimators(type_filter='classifier')
print('allAlgorithms : ', allAlgorithms)
print('몇 개? : ', len(allAlgorithms))  # 41

# 3. 출력(평가, 예측)
for (name, algorithm) in allAlgorithms:     # try, except 문으로 출력 중단 방지
    try :
        model = algorithm()
        model.fit(x_train, y_train)
        y_predict = model.predict(x_test)
        acc = accuracy_score(y_test, y_predict)
        print(name, '의 정답률 : ', acc)
    except :
        print(name, '출력 안 됨')

 

 

ml06_all_estimator03.py

import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import all_estimators
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')   # 경고 무시

# 1. 데이터
datasets = load_wine()
x = datasets.data
y = datasets.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=42, shuffle=True
)

# Scaler
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# 2. 모델
allAlgorithms = all_estimators(type_filter='classifier')
print('allAlgorithms : ', allAlgorithms)
print('몇 개? : ', len(allAlgorithms))  # 41

# 3. 출력(평가, 예측)
for (name, algorithm) in allAlgorithms:     # try, except 문으로 출력 중단 방지
    try :
        model = algorithm()
        model.fit(x_train, y_train)
        y_predict = model.predict(x_test)
        acc = accuracy_score(y_test, y_predict)
        print(name, '의 정답률 : ', acc)
    except :
        print(name, '출력 안 됨')

 

ml06_all_estimator02.py

import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.utils import all_estimators
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

# 1. 데이터
datasets = fetch_california_housing()
x = datasets.data
y = datasets.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size = 0.7, random_state=42, shuffle=True 
)

scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# 2. 모델
allalgorithms = all_estimators(type_filter='regressor')
print(len(allalgorithms))   # 55 개

# 3. 출력(평가, 예측)
for (name, algorithm) in allalgorithms:     # try, except 문으로 출력 중단 방지
    try : 
        model = algorithm()
        model.fit(x_train, y_train)

        y_predict = model.predict(x_test)
        r2 = r2_score(y_test, y_predict)
        print(name, "의 정답률 : ", r2)
    except :
        print(name, "출력 안 됨")


# 가장 좋은 모델 
# HistGradientBoostingRegressor 의 정답률 :  0.83795759614234