UOMOP

[Music Genre Classification using LGBMClassifier (##Final##)] 본문

Project/Music Plagiarism Project

[Music Genre Classification using LGBMClassifier (##Final##)]

Happy PinGu 2022. 11. 3. 15:04
import numpy as np
import pandas as pd
import librosa
import joblib 
import matplotlib.pyplot as plt
import IPython.display as ipd

from xgboost import plot_importance
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
def check_genre(song, sr, model) :

    col_names_drop = ['chroma_stft_mean', 'chroma_stft_var',
                      'rms_mean',         'rms_var',   
                      'spectral_centroid_mean', 'spectral_centroid_var',  
                      'spectral_bandwidth_mean', 'spectral_bandwidth_var', 
                      'rolloff_mean', 'rolloff_var', 
                      'zero_crossing_rate_mean','zero_crossing_rate_var',
                      'harmony_mean', 'harmony_var', 
                      'perceptr_mean', 'perceptr_var',
                      'tempo', 
                      'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean',   'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean',
                      'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
                      'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc12_mean','mfcc12_var',
                      'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean','mfcc15_var', 'mfcc16_mean', 'mfcc16_var',
                      'mfcc17_mean', 'mfcc18_mean', 'mfcc18_var','mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']

    col_names = ['chroma_stft_mean', 'chroma_stft_var',	'rms_mean',	'rms_var',	
                 'spectral_centroid_mean', 'spectral_centroid_var',	'spectral_bandwidth_mean',
                 'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean',
                 'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var',
                 'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean',	'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 
                 'mfcc4_mean','mfcc4_var', 'mfcc5_mean',	'mfcc5_var', 'mfcc6_mean', 'mfcc6_var',	'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
                 'mfcc8_var', 'mfcc9_mean',	'mfcc9_var', 'mfcc10_mean',	'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
                 'mfcc12_var', 'mfcc13_mean', 'mfcc13_var',	'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean',	'mfcc15_var', 'mfcc16_mean',
                 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var',	'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean',	'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']

    data_3sec = pd.read_csv("features_3_sec.csv");
    data_30sec = pd.read_csv("features_30_sec.csv");

    data = pd.concat([data_3sec, data_30sec])

    X = data.drop("label", axis = 1)
    X_droped = X.drop(["filename", "length", "mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1, inplace = False)

    chroma_shift = librosa.feature.chroma_stft(song, n_fft=2048, hop_length=512) # 음악의 크로마 특징
    rmse = librosa.feature.rms(song, frame_length=512, hop_length=512)           # RMS값
    spectral_centroids = librosa.feature.spectral_centroid(song, sr=sr)          # 스펙트럼 무게 중심
    spec_bw = librosa.feature.spectral_bandwidth(song, sr=sr)                    # 스펙트럼 대역폭
    spectral_rolloff = librosa.feature.spectral_rolloff(song, sr=sr)[0]          # rolloff
    zcr = librosa.feature.zero_crossing_rate(song, hop_length=512)               # zero to crossing
    y_harm, y_perc = librosa.effects.hpss(song)                                  # 하모닉, 충격파
    tempo, _ = librosa.beat.beat_track(song, sr=sr)                              # 템포
    mfcc = librosa.feature.mfcc(song, sr=sr,n_mfcc=20)                           # mfcc 20까지 추출

    features_extracted = np.hstack([                                    
                                    np.mean(chroma_shift),
                                    np.var(chroma_shift),
                                    np.mean(rmse),
                                    np.var(rmse),
                                    np.mean(spectral_centroids),
                                    np.var(spectral_centroids),
                                    np.mean(spec_bw),
                                    np.var(spec_bw),
                                    np.mean(spectral_rolloff),
                                    np.var(spectral_rolloff),
                                    np.mean(zcr),
                                    np.var(zcr),
                                    np.mean(y_harm),
                                    np.var(y_harm),
                                    np.mean(y_perc),
                                    np.var(y_perc),
                                    tempo,
                                    np.mean(mfcc.T, axis=0),
                                    np.var(mfcc.T, axis=0)
                                                            ])

    features = features_extracted.reshape(1, 57)
    input_df = pd.DataFrame(features, columns = col_names)
    input_df = input_df.drop(["mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1)
    
    df_concated = pd.concat([X_droped, input_df], axis = 0)

    ss = StandardScaler()
    concat_scaled = ss.fit_transform(np.array(df_concated.iloc[:, :], dtype = float))

    concat_df = pd.DataFrame(concat_scaled, columns = col_names_drop)
    
    input_df = concat_df.iloc[-1]
    input_df = pd.Series.to_frame(input_df)
    input_arr = input_df.to_numpy()
    input_arr = input_arr.reshape(1, 53)

    input_df = pd.DataFrame(input_arr, columns = col_names_drop)

    prediction = model.predict(input_df)

    print("\n=====================")
    
    print("Predicted Label : {}".format(prediction))

    if prediction == 0:
        answer = "blues"
    elif prediction == 1:
        answer = "classical"
    elif prediction == 2:
        answer = "country"
    elif prediction == 3:
        answer = "disco"
    elif prediction == 4:
        answer = "hiphop"
    elif prediction == 5:
        answer = "jazz"
    elif prediction == 6:
        answer = "metal"
    elif prediction == 7:
        answer = "pop"
    elif prediction == 8:
        answer = "reggae"
    else:
        answer = "rock"

    print("This is {}!!".format(answer))
    print("=====================")
data_3sec = pd.read_csv("features_3_sec.csv");
data_30sec = pd.read_csv("features_30_sec.csv");

data = pd.concat([data_3sec, data_30sec])

X = data.drop("label", axis = 1)
y = data.iloc[:, -1]

cvt = preprocessing.LabelEncoder()
y_encoded = cvt.fit_transform(y)

X_droped = X.drop(["filename", "length", "mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1, inplace = False)

ss = StandardScaler()
X_scaled = ss.fit_transform(np.array(X_droped.iloc[:, :], dtype = float))

X_df = pd.DataFrame(X_scaled, columns = ['chroma_stft_mean', 'chroma_stft_var', 
                                         'rms_mean', 'rms_var',   
                                         'spectral_centroid_mean', 'spectral_centroid_var', 
                                         'spectral_bandwidth_mean', 'spectral_bandwidth_var', 
                                         'rolloff_mean', 'rolloff_var', 
                                         'zero_crossing_rate_mean','zero_crossing_rate_var',
                                         'harmony_mean', 'harmony_var', 
                                         'perceptr_mean', 'perceptr_var',
                                         'tempo', 
                                         'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 
                                         'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var',   
                                         'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 
                                         'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc12_mean','mfcc12_var',
                                         'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean','mfcc15_var', 'mfcc16_mean', 'mfcc16_var',
                                         'mfcc17_mean', 'mfcc18_mean', 'mfcc18_var','mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var'])

y_df = pd.DataFrame(y_encoded, columns = ['target'])

X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size = 0.2, random_state = 156, shuffle = True)

evals = [(X_test, y_test)]

print("X_train.shape : {}".format(X_train.shape))
print("y_train.shape : {}".format(y_train.shape))

print("X_test.shape  : {}".format(X_test.shape))
print("y_test.shape  : {}".format(y_test.shape))

model = LGBMClassifier(learning_rate = 0.06, n_estimators = 781, max_depth = 11, min_child_weight = 1,
                       gamma = 0, subsample = 1, colsample_bytree = 1, min_data_in_leaf = 28, num_leaves = 19,
                       booster = 'gbtree', importance_type = 'gain', scoring = "accuracy", n_jobs = -1, random_state = 777)

model.fit(X_train, y_train, eval_set = evals,eval_metric = "logloss", verbose = 0)

pred = model.predict(X_test)
predictions = [round(value) for value in pred]
accuracy = accuracy_score(y_test, predictions) 
print("===================")
print("Accuracy : {:.2f} %".format(accuracy * 100))
print("===================")

joblib.dump(model, 'my_model.pkl')

model_called = joblib.load('my_model.pkl')
model_called.fit(X_train, y_train, eval_set = evals,eval_metric = "logloss", verbose = 0)

pred = model_called.predict(X_test)
predictions = [round(value) for value in pred]
accuracy = accuracy_score(y_test, predictions) 
print("===================")
print("Accuracy : {:.2f} %".format(accuracy * 100))
print("===================")

champion_22050, sr = librosa.load('1-1.wav', sr = 22050)
champion_16000, sr = librosa.load('1-1.wav', sr = 16000)
champion_8000,  sr = librosa.load('1-1.wav', sr = 8000)
champion_4000,  sr = librosa.load('1-1.wav', sr = 4000)

Sampling Rate은 임의로 낮추면 결과가 달라지는 것을 확인.
Feature 추출할 때 SR을 22050으로 해서.

 

 

 

 

 

Comments