데이터 분석_CNN_CIFAR10_MNIST, 시계열_RNN기본_LSTM

DATA_STUDY 2024. 1. 22. 19:05

CNN_ MNIST

라이브러리 import

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random as rd
import cv2, os

from sklearn.model_selection import train_test_split
from sklearn.metrics import *

from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.backend import clear_session
from keras.optimizers import Adam
from keras.datasets import mnist, fashion_mnist

데이터 불러오기

# 케라스 데이터셋으로 부터 mnist 불러오기
(x_train, y_train), (x_val, y_val) = mnist.load_data()
x_train.shape, y_train.shape

라벨링

class_names = ['0','1','2','3','4','5','6','7','8','9']

불러온 데이터 모델 확인하기

# 숫자변경해가면서 체크
n = 1

plt.figure()
plt.imshow(x_train[n], cmap=plt.cm.binary)
plt.colorbar()
plt.show()

데이터 준비

x_train.shape, x_val.shape

#변환하기
x_train = x_train.reshape(60000,28,28,1)
x_val = x_val.reshape(10000,28,28,1)

# 그래픽 0-255 사이값으로 변환
x_train = x_train / 255.
x_val = x_val / 255.

CNN 모델

# 세션 정리
clear_session()

#
model = Sequential([ Conv2D( 32, kernel_size=(3, 3), input_shape=(28,28,1), padding='same', activation='relu' ),
                     MaxPooling2D( pool_size=(2, 2), strides=(2,2)),
                     Conv2D( 64, kernel_size=(3, 3), padding='same', activation='relu'  ),
                     MaxPooling2D( pool_size=(2, 2), strides=(2,2)),
                    Flatten(),
                    Dense( 128, activation='relu' ),
                    Dense( 10, activation='softmax' )
])

#모델 요약
model.summary()

#모델컴파일
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy')

# 히스토리
history = model.fit(x_train, y_train, epochs = 10,
                    validation_split=0.2).history

학습 그래프 그리기

plt.figure(figsize=(10,6))
plt.plot(history['loss'], label='train_err', marker = ',')
plt.plot(history['val_loss'], label='val_err', marker = ',')

plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.grid()
plt.show()

예측 및 평가

pred = model.predict(x_val)
pred_1 = pred.argmax(axis=1)

print(accuracy_score(y_val,pred_1))
print('-'*60)
print(confusion_matrix(y_val, pred_1))
print('-'*60)
print(classification_report(y_val, pred_1))

CNN_CIFAR10

라이브러리 import

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random as rd
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow import keras

데이터 불러오기

(train_x, train_y), (test_x, test_y) = keras.datasets.cifar10.load_data()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

라벨링

labels = {0 : 'Airplane',
          1 : 'Automobile',
          2 : 'Bird',
          3 : 'Cat',
          4 : 'Deer',
          5 : 'Dog',
          6 : 'Frog',
          7 : 'Horse',
          8 : 'Ship',
          9 : 'Truck' }

print(labels)

이미지 랜덤 확인해보기

id = rd.randrange(0,10000)

print(f'id = {id}')
print(f'다음 그림은 {labels[test_y[id][0]]} 입니다.')
plt.imshow(test_x[id])
plt.show()

표준화 작업하기

한꺼번에 Standardization 적용
train을 기준으로 train, test에 적용 해야 할것

train_x  = (train_x - train_x.mean())/train_x.std()
test_x = (test_x - train_x.mean())/train_x.std()
train_x.shape, test_x.shape

y: one-hot-encoding

import numpy as np
from tensorflow.keras.utils import to_categorical
print( len(np.unique(train_y)) )

class_n = len(np.unique(train_y))
train_y = to_categorical(train_y, class_n)
test_y = to_categorical(test_y, class_n)
train_y.shape

확인하기

train_x.shape, train_y.shape

모델링하기

from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.losses import categorical_crossentropy

## Functional API
# 1. 세션 클리어 : 청소
clear_session()

# 2. 레이어 사슬처럼 엮기
# 인풋레이어
il = Input(shape=(32,32,3))
# Convolution : 필터수 32개, 사이즈(3, 3), same padding
hl = Conv2D(filters=32,         # 새롭게 제작하려는 feature map의 수! 혹은 서로 다른 필터 32개 사용!
            kernel_size=(3,3),  # Conv2D 필터의 가로세로 사이즈!
            strides=(1,1),      # Conv2D 필터의 이동 보폭!
            padding='same',     # 1.이전 feature map 사이즈 보존! 2. 외곽 정보 더 반영!
            activation='relu'
            )(il)               # 주의!!!
# Convolution : 필터수 32개, 사이즈(3, 3), same padding
hl = Conv2D(filters=32,         # 새롭게 제작하려는 feature map의 수! 혹은 서로 다른 필터 32개 사용!
            kernel_size=(3,3),  # Conv2D 필터의 가로세로 사이즈!
            strides=(1,1),      # Conv2D 필터의 이동 보폭!
            padding='same',     # 1.이전 feature map 사이즈 보존! 2. 외곽 정보 더 반영!
            activation='relu'
            )(hl)               # 주의!!!
# BatchNormalization
hl = BatchNormalization()(hl)
# MaxPooling : 사이즈(2,2) 스트라이드(2,2)
hl = MaxPool2D(pool_size=(2,2), # Maxpool2D 필터의 가로세로 사이즈!
               strides=(2,2)    # Maxpool2D 필터의 이동 보폭! 기본적으로 필터 사이즈를 따름!
               )(hl)
# DropOut : 25% 비활성화
hl = Dropout(0.25)(hl)

# Convolution : 필터수 64개, 사이즈(3, 3), same padding
hl = Conv2D(filters=64,         # 새롭게 제작하려는 feature map의 수! 혹은 서로 다른 필터 32개 사용!
            kernel_size=(3,3),  # Conv2D 필터의 가로세로 사이즈!
            strides=(1,1),      # Conv2D 필터의 이동 보폭!
            padding='same',     # 1.이전 feature map 사이즈 보존! 2. 외곽 정보 더 반영!
            activation='relu'
            )(hl)               # 주의!!!
# Convolution : 필터수 64개, 사이즈(3, 3), same padding
hl = Conv2D(filters=64,         # 새롭게 제작하려는 feature map의 수! 혹은 서로 다른 필터 32개 사용!
            kernel_size=(3,3),  # Conv2D 필터의 가로세로 사이즈!
            strides=(1,1),      # Conv2D 필터의 이동 보폭!
            padding='same',     # 1.이전 feature map 사이즈 보존! 2. 외곽 정보 더 반영!
            activation='relu'
            )(hl)               # 주의!!!
# BatchNormalization
hl = BatchNormalization()(hl)
# MaxPooling : 사이즈(2,2) 스트라이드(2,2)
hl = MaxPool2D(pool_size=(2,2), # Maxpool2D 필터의 가로세로 사이즈!
               strides=(2,2)    # Maxpool2D 필터의 이동 보폭! 기본적으로 필터 사이즈를 따름!
               )(hl)
# DropOut : 25% 비활성화
hl = Dropout(0.25)(hl)

# Flatten( )
hl = Flatten()(hl)
# Fully Connected Layer : 노드 1024개
hl = Dense(1024, activation='relu')(hl)
# BatchNormalization
hl = BatchNormalization()(hl)
# DropOut : 35% 비활성화
hl = Dropout(0.35)(hl)
# 아웃풋레이어
ol = Dense(10, activation='softmax')(hl)

# 3. 모델의 시작과 끝 지정
model2 = Model(il, ol)

# 4. 컴파일
model2.compile(optimizer='adam',
               loss=categorical_crossentropy,
               metrics=['accuracy']
               )
model2.summary()

얼리스탑핑 적용

from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(monitor='val_loss',     # 얼리스토핑을 적용할 대상!
                   min_delta=0,            # Threshold. 이보다 크게 변화해야 성능 개선 간주!
                   patience=3,             # 성능 개선이 발생하지 않을 때, 몇 epochs 더 볼 것인지!
                   verbose=1,              # 어느 epoch가 최적인지 알려줌!
                   restore_best_weights=True # 얼리스토핑으로 학습이 멈췄을 때, 최적의 가중치를 가진 시점으로 돌려줌!
                   )

모델 학습하기

model2.fit(train_x, train_y, epochs=10000, verbose=1,
           validation_split=0.2,   # 매 epoch마다 training set에서 20%를 validation으로 지정!
           callbacks=[es]          # 얼리스토핑 적용!
           )

확인하기

# 원핫 인코딩 해제 : 카테고리 중 가장 높은 값
train_y = train_y.argmax(axis=1)
test_y = test_y.argmax(axis=1)

정확도 확인

pred_train = model2.predict(train_x)
pred_test = model2.predict(test_x)

single_pred_train = pred_train.argmax(axis=1)
single_pred_test = pred_test.argmax(axis=1)

logi_train_accuracy = accuracy_score(train_y, single_pred_train)
logi_test_accuracy = accuracy_score(test_y, single_pred_test)

print('CNN')
print(f'트레이닝 정확도 : {logi_train_accuracy*100:.2f}%')
print(f'테스트 정확도 : {logi_test_accuracy*100:.2f}%')

# 성능확인하기

id = rd.randrange(0,10000)

print(f'id = {id}')
print(f'다음 그림은 {labels[test_y[id]]} 입니다.')
print(f'모델의 예측 : {labels[single_pred_test[id]]}')

prob = np.floor(pred_test[id]*100).tolist()
prob_dict = {}

for idx, prob in enumerate(prob) :
    prob_dict[ labels[idx] ] = prob

print('모델의 카테고리별 확률 : ')
print(prob_dict)

if test_y[id] == single_pred_test[id] :
    print('정답입니다')
else :
    print('틀렸어요')

plt.imshow(test_x[id].reshape([32,32,-1]))
plt.show()

RNN 기본

라이브러리 import

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, GRU
from keras.backend import clear_session
from tensorflow.keras.optimizers import Adam

# 시계열 전처리 : 2차원-> 3차원으로 변환

def temporalize(x, y, timesteps):
    nfeature = x.shape[1]
    output_x = []
    output_y = []
    for i in range(len(x) - timesteps + 1):
        t = []
        for j in range(timesteps):
            t.append(x[[(i + j)], :])
        output_x.append(t)
        output_y.append(y[i + timesteps - 1])
    return np.array(output_x).reshape(-1,timesteps, nfeature), np.array(output_y)

데이터 준비

data = pd.read_csv('')
x = data.loc[:, ['']]
y = data.loc[:,'y']

데이터 스케일링

scaler = MinMaxScaler()
x = scaler.fit_transform(x)
x.shape, y.shape

3차원 구조 만들기

x2, y2 = temporalize(x, y, 4) #분석단위로  쓸 숫자 넣기.
x2.shape, y2.shape

데이터 분할

x_train, x_val, y_train, y_val = train_test_split(x2, y2, test_size= 53, shuffle = False)
#shuffle=False인경우 데이터를 섞지 말라는 것입니다

#확인
x_train.shape, y_train.shape,x_val.shape, y_val.shape

RNN 모델링하기

- 입력구조 확인하기

- 분석 단위 2차원( timesteps, nfeatures)

x_train.shape
#(208, 4, 1)이면 #input_shape=(4,1)임

timestep = x_train.shape[1]
nfeatures = x_train.shape[2]

1) 모델 RNN의 경우

#모델 세션 정리
clear_session()
#RNN
model = Sequential([ SimpleRNN(8, input_shape=(timestep, nfeatures), return_sequences=True),
                    SimpleRNN(4),
                    Dense(1)])
model.summary()
# 컴파일  
model.compile(optimizer = Adam(lr = 0.01), loss = 'mse' )
# 학습하기
history = model.fit(x_train, y_train, epochs = 100, validation_split = .2).history

2) 모델 LSTM의 경우

from keras.layers import Flatten
#모델 세션 정리
clear_session()

#LSTM
modelL = Sequential([LSTM(16, input_shape = (timestep, nfeatures), return_sequences=True),
                    LSTM(8, return_sequences = True),
                    Flatten(),
                    Dense(16, activation = 'relu'),
                    Dense(1)])
#모델요약
modelL.summary()

#모델 컴파일 
modelL.compile(optimizer = Adam(lr = 0.01), loss = 'mse' )
# 모델 학습
history= modelL.fit(x_train, y_train, epochs = 100, validation_split = .2).history

학습곡선

plt.figure(figsize=(10,6))
plt.plot(history['loss'], label='train_err', marker = ',')
plt.plot(history['val_loss'], label='val_err', marker = ',')

plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.grid()
plt.show()

예측 & 평가

# 예측
pred = model.predict(x_val)

# 평가
print(mean_absolute_error(y_val, pred))
print(mean_absolute_percentage_error(y_val, pred))

시각화

plt.figure(figsize = (10,6))
plt.plot(y_val, label = 'actual')
plt.plot(pred, label = 'predicted')
plt.legend()
plt.grid()
plt.show()

저작자표시 (새창열림)

'DATA_STUDY' 카테고리의 다른 글

파이썬_워드클라우드 2 (0)	2024.01.22
파이썬_파일읽고쓰기_워드클라우드 (1)	2024.01.22
데이터 분석_파이토치 : Pytorch3 (0)	2024.01.22
데이터 분석_파이토치 : Pytorch2 (0)	2024.01.22
데이터 분석_파이토치 : Pytorch1 (0)	2024.01.19

ABOUT ME

Data_with_U Data_with_U

CNN_ MNIST

CNN_CIFAR10

RNN 기본

'DATA_STUDY' 카테고리의 다른 글

티스토리툴바

ABOUT ME

CNN_ MNIST

CNN_CIFAR10

RNN 기본

'DATA_STUDY' 카테고리의 다른 글

관련글 관련글 더보기

티스토리툴바