UOMOP
[Librosa Tempo Problem] 본문
librosa에서 제공하는 beat_track 등을 이용해서 tempo(bpm)을 구하면 정확성이 많이 떨어진다.
직접 음원의 Feature들을 추출해보고 tempo(bpm)에 영향을 많이 주는 Feature를 찾아보자.
# ========================= 라이브러리 호출 =========================
import numpy as np
import pandas as pd
import librosa
import joblib
import matplotlib.pyplot as plt
import IPython.display as ipd
import seaborn as sns
from sklearn import metrics
from xgboost import plot_importance
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
import librosa.feature
import librosa.display
def get_features(y, sr) : # Categorizing label data
chroma_shift = librosa.feature.chroma_stft(y, n_fft=2048, hop_length=512) # 음악의 크로마 특징
rmse = librosa.feature.rms(y, frame_length=512, hop_length=512) # RMS값
spectral_centroids = librosa.feature.spectral_centroid(y, sr=sr) # 스펙트럼 무게 중심
spec_bw = librosa.feature.spectral_bandwidth(y, sr=sr) # 스펙트럼 대역폭
spectral_rolloff = librosa.feature.spectral_rolloff(y, sr=sr)[0] # rolloff
zcr = librosa.feature.zero_crossing_rate(y, hop_length=512) # zero to crossing
y_harm, y_perc = librosa.effects.hpss(y) # 하모닉, 충격파
tempo, _ = librosa.beat.beat_track(y, sr=sr) # 템포
mfcc = librosa.feature.mfcc(y, sr=sr,n_mfcc=20) # mfcc 20까지 추출
features_extracted = np.hstack([
np.mean(chroma_shift),
np.var(chroma_shift),
np.mean(rmse),
np.var(rmse),
np.mean(spectral_centroids),
np.var(spectral_centroids),
np.mean(spec_bw),
np.var(spec_bw),
np.mean(spectral_rolloff),
np.var(spectral_rolloff),
np.mean(zcr),
np.var(zcr),
np.mean(y_harm),
np.var(y_harm),
np.mean(y_perc),
np.var(y_perc),
tempo,
np.mean(mfcc.T, axis=0),
np.var(mfcc.T, axis=0)
])
features = features_extracted.reshape(1, 57)
col_names = ['chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var',
'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean',
'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean',
'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var',
'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var',
'mfcc4_mean','mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean',
'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']
df = pd.DataFrame(features, columns = col_names)
features = df.drop(["mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1, inplace = False)
array = (np.array(features)).tolist()
return df
pd.set_option('display.max_columns', None)
# DataFrame 전체를 확인
song1_1, sr = librosa.load("1-1.wav", sr = 22050)
song1_2, sr = librosa.load("1-2.wav", sr = 22050)
song2_1, sr = librosa.load("2-1.wav", sr = 22050)
song2_2, sr = librosa.load("2-2.wav", sr = 22050)
song3_1, sr = librosa.load("3-1.wav", sr = 22050)
song3_2, sr = librosa.load("3-2.wav", sr = 22050)
song4_1, sr = librosa.load("4-1.wav", sr = 22050)
song4_2, sr = librosa.load("4-2.wav", sr = 22050)
song5_1, sr = librosa.load("5-1.wav", sr = 22050)
song5_2, sr = librosa.load("5-2.wav", sr = 22050)
song6_1, sr = librosa.load("6-1.wav", sr = 22050)
song6_2, sr = librosa.load("6-2.wav", sr = 22050)
song7_1, sr = librosa.load("7-1.wav", sr = 22050)
song7_2, sr = librosa.load("7-2.wav", sr = 22050)
song8_1, sr = librosa.load("8-1.wav", sr = 22050)
song8_2, sr = librosa.load("8-2.wav", sr = 22050)
song9_1, sr = librosa.load("9-1.wav", sr = 22050)
song9_2, sr = librosa.load("9-2.wav", sr = 22050)
song10_1, sr = librosa.load("10-1.wav", sr = 22050)
song10_2, sr = librosa.load("10-2.wav", sr = 22050)
song11_1, sr = librosa.load("11-1.wav", sr = 22050)
song11_2, sr = librosa.load("11-2.wav", sr = 22050)
song12_1, sr = librosa.load("12-1.wav", sr = 22050)
song12_2, sr = librosa.load("12-2.wav", sr = 22050)
song13_1, sr = librosa.load("13-1.wav", sr = 22050)
song13_2, sr = librosa.load("13-2.wav", sr = 22050)
song14_1, sr = librosa.load("14-1.wav", sr = 22050)
song14_2, sr = librosa.load("14-2.wav", sr = 22050)
song15_1, sr = librosa.load("15-1.wav", sr = 22050)
song15_2, sr = librosa.load("15-2.wav", sr = 22050)
song16_1, sr = librosa.load("16-1.wav", sr = 22050)
song16_2, sr = librosa.load("16-2.wav", sr = 22050)
song17_1, sr = librosa.load("17-1.wav", sr = 22050)
song17_2, sr = librosa.load("17-2.wav", sr = 22050)
song18_1, sr = librosa.load("18-1.wav", sr = 22050)
song18_2, sr = librosa.load("18-2.wav", sr = 22050)
song19_1, sr = librosa.load("19-1.wav", sr = 22050)
song19_2, sr = librosa.load("19-2.wav", sr = 22050)
feature1_1 = get_features(song1_1, sr = sr)
feature1_2 = get_features(song1_2, sr = sr)
feature2_1 = get_features(song2_1, sr = sr)
feature2_2 = get_features(song2_2, sr = sr)
feature3_1 = get_features(song3_1, sr = sr)
feature3_2 = get_features(song3_2, sr = sr)
feature4_1 = get_features(song4_1, sr = sr)
feature4_2 = get_features(song4_2, sr = sr)
feature5_1 = get_features(song5_1, sr = sr)
feature5_2 = get_features(song5_2, sr = sr)
feature6_1 = get_features(song6_1, sr = sr)
feature6_2 = get_features(song6_2, sr = sr)
feature7_1 = get_features(song7_1, sr = sr)
feature7_2 = get_features(song7_2, sr = sr)
feature8_1 = get_features(song8_1, sr = sr)
feature8_2 = get_features(song8_2, sr = sr)
feature9_1 = get_features(song9_1, sr = sr)
feature9_2 = get_features(song9_2, sr = sr)
feature10_1 = get_features(song10_1, sr = sr)
feature10_2 = get_features(song10_2, sr = sr)
feature11_1 = get_features(song11_1, sr = sr)
feature11_2 = get_features(song11_2, sr = sr)
feature12_1 = get_features(song12_1, sr = sr)
feature12_2 = get_features(song12_2, sr = sr)
feature13_1 = get_features(song13_1, sr = sr)
feature13_2 = get_features(song13_2, sr = sr)
feature14_1 = get_features(song14_1, sr = sr)
feature14_2 = get_features(song14_2, sr = sr)
feature15_1 = get_features(song15_1, sr = sr)
feature15_2 = get_features(song15_2, sr = sr)
feature16_1 = get_features(song16_1, sr = sr)
feature16_2 = get_features(song16_2, sr = sr)
feature17_1 = get_features(song17_1, sr = sr)
feature17_2 = get_features(song17_2, sr = sr)
feature18_1 = get_features(song18_1, sr = sr)
feature18_2 = get_features(song18_2, sr = sr)
feature19_1 = get_features(song19_1, sr = sr)
feature19_2 = get_features(song19_2, sr = sr)
data = pd.concat([feature1_1, feature1_2, feature2_1, feature2_2, feature3_1, feature3_2, feature4_1, feature4_2, feature5_1, feature5_2,
feature6_1, feature6_2, feature7_1, feature7_2, feature8_1, feature8_2, feature9_1, feature9_2, feature10_1, feature10_2,
feature11_1, feature11_2, feature12_1, feature12_2, feature13_1, feature13_2, feature14_1, feature14_2, feature15_1, feature15_2,
feature16_1, feature16_2, feature17_1, feature17_2, feature18_1, feature18_2, feature19_1, feature19_2])
data = data.drop(['mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var',
'mfcc4_mean','mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean',
'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var'], axis = 1, inplace = False)
mfcc Feature는 모두 drop
data = data.reset_index()
data = data.drop(['index'], axis = 1, inplace = False)
data.rename(index = {0 : '1-1', 1 : '1-2', 2 : '2-1', 3 : '2-2', 4 : '3-1', 5 : '3-2', 6 : '4-1', 7 : '4-2', 8 : '5-1', 9 : '5-2',
10 : '6-1', 11 : '6-2', 12 : '7-1', 13 : '7-2', 14 : '8-1', 15 : '8-2', 16 : '9-1', 17 : '9-2', 18 : '10-1', 19 : '10-2',
20 : '11-1', 21 : '11-2', 22 : '12-1', 23 : '12-2', 24 : '13-1', 25 : '13-2', 26 : '14-1', 27 : '14-2', 28 : '15-1', 29 : '15-2',
30 : '16-1', 31 : '16-2', 32 : '17-1', 33 : '17-2', 34 : '18-1', 35 : '18-2', 36 : '19-1', 37 : '19-2'}, inplace = True)
data = data.drop(["chroma_stft_var", "rms_mean", "rms_var", "spectral_centroid_var", "spectral_bandwidth_var",
"rolloff_mean", "rolloff_var", "tempo", "harmony_mean", "harmony_var", "zero_crossing_rate_mean",
"spectral_bandwidth_mean", "perceptr_mean", "chroma_stft_mean"], axis = 1, inplace = False)
data.head(40)
from sklearn.preprocessing import StandardScaler, MinMaxScaler
ss = MinMaxScaler()
data = ss.fit_transform(np.array(data.iloc[:, :], dtype = float))
data_df = pd.DataFrame(data, columns = ['spectral_centroid_mean', 'zero_crossing_rate_var', 'perceptr_var'])
data_df.head(40)
perceptr_var, zero_crossing_rate_var, spectral_centroid_mean 가 tempo의 영향을 아주 많이 받는다.
def speed_checker(song1, song2, sr) :
song1_harm, song1_perc = librosa.effects.hpss(song1)
song1_zcr = librosa.feature.zero_crossing_rate(song1, hop_length = 512)
song1_spectral_centroid = librosa.feature.spectral_centroid(song1, sr = sr)
song1_perceptr_var = np.var(song1_perc)
song1_zcr_var = np.var(song1_zcr)
song1_spectral_centroid_mean = np.mean(song1_spectral_centroid)
song1_perceptr_var_scaled = ((song1_perceptr_var - 4.67 * (10**-8)) / (0.058879 - 4.67 * (10**-8)))
song1_zcr_var_scaled = ((song1_zcr_var - 5.02 * (10**-6)) / (0.065185 - 5.02 * (10**-6)))
song1_spectral_centroid_scaled = ((song1_spectral_centroid_mean - 300) / (5432.534 - 300))
song1_score = np.mean(song1_perceptr_var_scaled + song1_zcr_var_scaled + song1_spectral_centroid_scaled)
song2_harm, song2_perc = librosa.effects.hpss(song2)
song2_zcr = librosa.feature.zero_crossing_rate(song2, hop_length = 512)
song2_spectral_centroid = librosa.feature.spectral_centroid(song2, sr = sr)
song2_perceptr_var = np.var(song2_perc)
song2_zcr_var = np.var(song2_zcr)
song2_spectral_centroid_mean = np.mean(song2_spectral_centroid)
song2_perceptr_var_scaled = ((song2_perceptr_var - 4.67 * (10**-8)) / (0.058879 - 4.67 * (10**-8)))
song2_zcr_var_scaled = ((song2_zcr_var - 5.02 * (10**-6)) / (0.065185 - 5.02 * (10**-6)))
song2_spectral_centroid_scaled = ((song2_spectral_centroid_mean - 300) / (5432.534 - 300))
song2_score = np.mean(song2_perceptr_var_scaled + song2_zcr_var_scaled + song2_spectral_centroid_scaled)
return song1_score, song2_score
Scaling의 Param.은 10000개의 음원데이터의 Max, Min 값을 이용.
표절 논란이 있는 힙합곡을 speed_checker로 확인
표절 논란이 있는 매우 느린 클래식 피아노곡을 speed_checker로 확인
기존에 사용하던 tempo_checker에 비해 성능이 좋은 것으로 확
'Project > Music Plagiarism Project' 카테고리의 다른 글
[##midway check-up##] (0) | 2022.11.06 |
---|---|
[Plagirism Scoring using Librosa's DTW (only time signal)] (1) | 2022.11.06 |
[Plagiarism Scoring using DTW with Librosa's Chroma] (2) | 2022.11.04 |
[Music Genre Classification using LGBMClassifier (##Final##)] (0) | 2022.11.03 |
Define Function : Music Genre Classification (0) | 2022.10.26 |
Comments