UOMOP

Overfitting 극복 using 가중치 규제 본문

Ai/DL

Overfitting 극복 using 가중치 규제

Happy PinGu 2022. 2. 10. 00:15
우선, 과적합이란 "모델이 학습시키는 데이터에만 집중하게 되어 다른 데이터로 성능을 확인할 때는 성능이 떨어지는 현상", 즉, "학습 데이터일 때만 Loss값이 매우 낮고 검증(또는 테스트) 데이터일 때는 Loss값이 상대적으로 높은 현상"을 의미한다.

이때 가중치 규제를 준다는 것은 원래의 손실 함수 출력 값에 아주 작은 값을 추가해 고의적으로 학습 데이터에 대한 Loss를 키운다. 학습 데이터로 얻은 Loss 출력에 일종의 불순물(?)을 넣어주고 이 데이터에만 집중하지 않게 해 줌으로써 검증(또는 테스트) 데이터에도 집중을 하게끔 한다. 이렇게 Overfitting 현상을 방지한다.



만약 가중치의 절댓값을 더해준다면 l1   tensorflow.keras.regularizers.l1(알파값)
       가중치의 제곱을 더해준다면 l2      tensorflow.keras.regularizers.l2(알파 값)
       이 둘을 결합하게 되면 l1_l2          tensorflow.keras.regularizers.l1_l2(l1 = , l2 = )

alpha 값을 통해서 규제의 비중을 결정할 수 있다.

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, \
                                                    Flatten, Dropout, Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l1, l2, l1_l2


def make_zero_to_one(images, labels) :
    
    images = np.array(images/255., dtype = np.float32)
    labels = np.array(labels, dtype = np.float32)
    
    return images, labels


def ohe(labels) :
    
    labels = to_categorical(labels)
    
    return labels


def tr_val_test(train_images, train_labels, test_images, test_labels, val_rate) :
    
    tr_images, val_images, tr_labels, val_labels = \
                          train_test_split(train_images, train_labels, test_size = val_rate)
    
    return (tr_images, tr_labels), (val_images, val_labels), (test_images, test_labels)


def create_before_model(tr_images, verbose):
    
    input_size = tr_images.shape[1]

    input_tensor = Input(shape=(input_size, input_size, 3))

    x = Conv2D(filters=32, kernel_size=(3, 3), padding='same')(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=32, kernel_size=(3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(filters=64, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=64, kernel_size=3, padding='same')(x)
    x = Activation('relu')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=2)(x)

    x = Conv2D(filters=128, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=128, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Flatten(name='flatten')(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(300, activation='relu', name='fc1')(x)
    x = Dropout(rate=0.3)(x)
    output = Dense(10, activation='softmax', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    
    return model

    if verbose == True :
        model.summary()
        
        
def create_after_model(tr_images, verbose):
    
    input_size = tr_images.shape[1]

    input_tensor = Input(shape=(input_size, input_size, 3))

    x = Conv2D(filters=32, kernel_size=(3, 3), padding='same', kernel_regularizer = l2(0.00001))(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=32, kernel_size=(3, 3), padding='same', kernel_regularizer = l2(0.00001))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(filters=64, kernel_size=3, padding='same', kernel_regularizer = l1_l2(l1 = 1e-5, l2 = 1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=64, kernel_size=3, padding='same', kernel_regularizer = l2(1e-5))(x)
    x = Activation('relu')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=2)(x)

    x = Conv2D(filters=128, kernel_size=3, padding='same', kernel_regularizer = l2(1e-5))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=128, kernel_size=3, padding='same', kernel_regularizer = l2(1e-5))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Flatten()(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(300, activation='relu', name='fc1', kernel_regularizer = l2(1e-5))(x)
    x = Dropout(rate=0.3)(x)
    output = Dense(10, activation='softmax', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    
    return model

    if verbose == True :
        model.summary()
        
        
def lets_compare_two(before, after) :
    
    fig, axs = plt.subplots(nrows = 1, ncols = 2, figsize = (22, 6))
    
    axs[0].plot(before.history["val_accuracy"], label = "before")
    axs[0].plot(after.history["val_accuracy"], label = "after")
    axs[0].set_title("val_accuracy")
    axs[0].set_xlabel("epochs")
    axs[0].set_ylabel("val_acc")
    axs[0].legend()
    
    axs[1].plot(before.history["val_loss"], label = "before")
    axs[1].plot(after.history["val_loss"], label = "after")
    axs[1].set_title("val_loss")
    axs[1].set_xlabel("epochs")
    axs[1].set_ylabel("val_loss")
    axs[1].legend()
    
    plt.show()
    
    
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

train_images, train_labels = make_zero_to_one(train_images, train_labels)
test_images, test_labels   = make_zero_to_one(test_images, test_labels)

train_labels = ohe(train_labels)
test_labels  = ohe(test_labels)

(tr_images, tr_labels), (val_images, val_labels), (test_images, test_labels) = \
         tr_val_test(train_images, train_labels, test_images, test_labels, val_rate = 0.15)
         
         
         
model_before = create_before_model(tr_images, verbose = True)

model_before.compile(optimizer = Adam(learning_rate = 0.001), loss = "categorical_crossentropy", metrics = ["accuracy"])

rlr = ReduceLROnPlateau(monitor = "val_loss", factor = 0.2, patience = 5, mode = "min", verbose = True)
ely = EarlyStopping(monitor = "val_loss", patience = 13, mode = "min", verbose = True)


result_before = model_before.fit(x = tr_images, y = tr_labels, batch_size = 32, epochs = 40, shuffle = True,
                                 validation_data = (val_images, val_labels), callbacks = [rlr, ely])
                                 
                                 
model_after = create_after_model(tr_images, verbose = True)

model_after.compile(optimizer = Adam(learning_rate = 0.001), loss = "categorical_crossentropy", metrics = ["accuracy"])

rlr = ReduceLROnPlateau(monitor = "val_loss", factor = 0.2, patience = 5, mode = "min", verbose = True)
ely = EarlyStopping(monitor = "val_loss", patience = 13, mode = "min", verbose = True)


result_after = model_after.fit(x = tr_images, y = tr_labels, batch_size = 32, epochs = 40, shuffle = True,
                                 validation_data = (val_images, val_labels), callbacks = [rlr, ely])
                                 
                                 
fig, axs = plt.subplots(nrows = 1, ncols = 2, figsize = (22, 6))
    
axs[0].plot(result_before.history["accuracy"], label = "tr")
axs[0].plot(result_before.history["val_accuracy"], label = "val")
axs[0].set_title("before")
axs[0].set_xlabel("epochs")
axs[0].set_ylabel("acc")
axs[0].legend()
    
axs[1].plot(result_after.history["accuracy"], label = "tr")
axs[1].plot(result_after.history["val_accuracy"], label = "val")
axs[1].set_title("after")
axs[1].set_xlabel("epochs")
axs[1].set_ylabel("acc")
axs[1].legend()

plt.show()
Overfitting 현상이 과연 개선되었나..?
일단 있다는 것만 알아두도록 한다.

 

 

 

 

 

Comments