UOMOP

Music Genre Classification using XGBoost 본문

Project/Music Plagiarism Project

Music Genre Classification using XGBoost

Happy PinGu 2022. 10. 26. 01:24
pip install joblib


from xgboost import XGBClassifier
from xgboost import plot_importance
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import joblib 

data = pd.read_csv("features_3_sec.csv");

X = data.drop("label", axis = 1)
Y  = data.iloc[:, -1]
pd.set_option('display.max_columns', None)

cvt = preprocessing.LabelEncoder()
Y_encoded = cvt.fit_transform(Y)

X_droped = X.drop(["filename", "length"], axis = 1, inplace = False)
X_droped.head()

ss = StandardScaler()
X_scaled = ss.fit_transform(np.array(X_droped.iloc[:, :], dtype = float))

X_df = pd.DataFrame(X_scaled, columns = ['chroma_stft_mean', 'chroma_stft_var',	'rms_mean',	'rms_var',	
                                             'spectral_centroid_mean', 'spectral_centroid_var',	'spectral_bandwidth_mean',
                                             'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean',
                                             'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var',
                                             'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean',	'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean',
                                             'mfcc4_var', 'mfcc5_mean',	'mfcc5_var', 'mfcc6_mean', 'mfcc6_var',	'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
                                             'mfcc8_var', 'mfcc9_mean',	'mfcc9_var', 'mfcc10_mean',	'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
                                             'mfcc12_var', 'mfcc13_mean', 'mfcc13_var',	'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean',	'mfcc15_var', 'mfcc16_mean',
                                             'mfcc16_var', 'mfcc17_mean', 'mfcc17_var',	'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean',	'mfcc19_var', 'mfcc20_mean', 'mfcc20_var'])

Y_df = pd.DataFrame(Y_encoded, columns = ['target'])


X_train, X_test, y_train, y_test = train_test_split(X_df, Y_df, test_size = 0.25, random_state = 156)

model = XGBClassifier(base_score = 0.5, booster = 'gbtree', colsample_bylevel = 1,
                      colsample_bynode = 1, colsample_bytree = 1, gamma = 0, learning_rate = 0.27,
                      max_delta_step = 0, max_depth = 6, min_child_weight = 1, missing = None,
                      n_estimators = 250, n_jobs = 1, nthread = None, objective = 'multi:softprob',
                      random_state = 0, reg_alpha = 0, reg_lambda = 1, scale_pos_weight = 1, seed = None,
                      silent = None, subsample = 1, verbosity = 1)

model.fit(X_train, y_train, early_stopping_rounds = 200, eval_metric = 'mlogloss', eval_set = [(X_test, y_test)])
pred = model.predict(X_test)
pred_proba = model.predict_proba(X_test)[:,1]

fig, ax = plt.subplots(figsize = (10, 12))
plot_importance(model, ax=ax)

predictions = [round(value) for value in pred]
accuracy = accuracy_score(y_test, predictions) 
print("\n\n\n===========================")
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print("===========================\n")
학습한 모델을 pkl을 이용하여 저장하고 호출해보도록 하자.
filename = 'xgboost_model.pkl'
joblib.dump(model, filename)

model_called = joblib.load(filename)

pred = model_called.predict(X_test)
pred_proba = model_called.predict_proba(X_test)[:,1]

fig, ax = plt.subplots(figsize = (10, 12))
plot_importance(model_called, ax=ax)

predictions = [round(value) for value in pred]
accuracy = accuracy_score(y_test, predictions) 
print("\n\n\n===========================")
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print("===========================\n")

 

 

 

 

 

Comments