UOMOP
[tester(hard_level = 6, sr_level = 100)] 본문
Project/Music Plagiarism Project
[tester(hard_level = 6, sr_level = 100)]
Happy PinGu 2022. 11. 10. 07:57def Devider(A, B, sr, period, mode) :
# A : 표절 논란곡 1
# B : 표절 논란곡 2
# sr : A, B의 Sampling Rate
# period : 몇 초씩 쪼갤 것인가
# mode = 1 : 전체 길이 데이터를 사용
# = 2 : 전체 길의의 1/2만을 사용
# ===== 두 곡의 길이를 동일하게 설정 =====
length = int( ( min(len(A.tolist()), len(B.tolist())) ) / mode )
A = (A[0 : length])
B = (B[0 : length])
# ===== 2곡의 Data를 몇 개로 dividing 할 것인지 확인 =====
num_of_window = math.floor(length / (period * sr))
final_index_A = 0
final_index_B = 0
# ===== 각 곡의 dividing된 Data가 Mat_A, Mat_B에 2D-array로 save =====
Mat_A = [[0 for col in range(int(period * sr))] for row in range(int(num_of_window))]
Mat_B = [[0 for col in range(int(period * sr))] for row in range(int(num_of_window))]
# ===== 각 곡의 dividing된 Data가 Mat_A, Mat_B에 2D-array로 save =====
for i in range(0, num_of_window) :
for j in range(0, int(period * sr)) :
Mat_A[i][j] = A[final_index_A + j]
Mat_B[i][j] = B[final_index_B + j]
final_index_A = int(period * sr) * (i+1)
final_index_B = int(period * sr) * (i+1)
return Mat_A, Mat_B
def extractor(A, area, sr) :
# area = 01:31.5 ~ 02:18.3
area = str(area)
start = int(area[0]) * 600 + int(area[1]) * 60 + int(area[3]) * 10 + int(area[4]) * 1 + int(area[6]) /10
end = int(area[10]) * 600 + int(area[11]) * 60 + int(area[13]) * 10 + int(area[14]) * 1 + int(area[16]) /10
A_cut = A[int(start * sr) : int(end * sr)]
return A_cut
def speed_checker(input) :
input_flatness = librosa.feature.spectral_flatness(y = input)
tempo_final = np.std(input_flatness[0])
return tempo_final * 100
def FastDTW(A, B):
distance, path = fastdtw(A, B, dist = euclidean)
return distance
def get_features(y, sr) : # Categorizing label data
chroma_shift = librosa.feature.chroma_stft(y, n_fft=2048, hop_length=512) # 음악의 크로마 특징
rmse = librosa.feature.rms(y, frame_length=512, hop_length=512) # RMS값
spectral_centroids = librosa.feature.spectral_centroid(y, sr=sr) # 스펙트럼 무게 중심
spec_bw = librosa.feature.spectral_bandwidth(y, sr=sr) # 스펙트럼 대역폭
spectral_rolloff = librosa.feature.spectral_rolloff(y, sr=sr)[0] # rolloff
zcr = librosa.feature.zero_crossing_rate(y, hop_length=512) # zero to crossing
y_harm, y_perc = librosa.effects.hpss(y) # 하모닉, 충격파
tempo, _ = librosa.beat.beat_track(y, sr=sr) # 템포
mfcc = librosa.feature.mfcc(y, sr=sr,n_mfcc=20) # mfcc 20까지 추출
features_extracted = np.hstack([
np.mean(chroma_shift),
np.var(chroma_shift),
np.mean(rmse),
np.var(rmse),
np.mean(spectral_centroids),
np.var(spectral_centroids),
np.mean(spec_bw),
np.var(spec_bw),
np.mean(spectral_rolloff),
np.var(spectral_rolloff),
np.mean(zcr),
np.var(zcr),
np.mean(y_harm),
np.var(y_harm),
np.mean(y_perc),
np.var(y_perc),
tempo,
np.mean(mfcc.T, axis=0),
np.var(mfcc.T, axis=0)
])
features = features_extracted.reshape(1, 57)
col_names = ['chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var',
'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean',
'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean',
'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var',
'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var',
'mfcc4_mean','mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean',
'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']
df = pd.DataFrame(features, columns = col_names)
features = df.drop(["mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1, inplace = False)
array = (np.array(features)).tolist()
return df
def LPF(data, sr, cutoff) :
b = signal.firwin(101, cutoff = cutoff, fs = sr, pass_zero = 'lowpass')
data1 = signal.lfilter(b, [1.0], data)
return data1
def HPF(data, sr, cutoff) :
b = signal.firwin(101, cutoff = cutoff, fs = sr, pass_zero = 'highpass')
data1 = signal.lfilter(b, [1.0], data)
return data1
def show_spec(data, sr) :
fft = np.fft.fft(data)
magnitude = np.abs(fft)
f = np.linspace(0, sr, len(magnitude))
left_spectrum = magnitude[:int(len(magnitude)/2)]
left_f = f[:int(len(magnitude)/2)]
plt.figure(figsize = (20, 10))
plt.plot(left_f, left_spectrum)
def chroma_score(song1, song1_plag_area, song2, song2_plag_area, sr, rand_num) :
# 나중에 모드를 여러개 정해보자.
# 만약 random이면 비교대상이 random이고, window면 주기적으로 window를 내는 방식이다.
song1_ext = np.array(extractor(song1, song1_plag_area, sr = sr))
song2_ext = np.array(extractor(song2, song2_plag_area, sr = sr))
song1_chroma = librosa.feature.chroma_stft(y = song1_ext, sr = sr)
song2_chroma = librosa.feature.chroma_stft(y = song2_ext, sr = sr)
#================================song1의 난수 생성================================
song1_start = int(song1_plag_area[0]) * 600 + int(song1_plag_area[1]) * 60 + int(song1_plag_area[3]) * 10 + int(song1_plag_area[4]) * 1 + int(song1_plag_area[6]) /10
song1_end = int(song1_plag_area[10]) * 600 + int(song1_plag_area[11]) * 60 + int(song1_plag_area[13]) * 10 + int(song1_plag_area[14]) * 1 + int(song1_plag_area[16]) /10
song1_len = int(len(song1) / sr)
song1_len_plag = int( song1_end - song1_start )
rand_range = song1_len - song1_len_plag
song1_rand_saver = random.sample( range(0, rand_range), rand_num )
cnt_1 = 0
for i in range(0, rand_num) :
while ( abs( song1_rand_saver[i] - int(song1_start) ) <= int(song1_len_plag / 2)) :
new_rand = random.sample( range(0, rand_range), 1 )
song1_rand_saver[i] = new_rand[0]
cnt_1 += 1
#================================song2의 난수 생성================================
song2_start = int(song2_plag_area[0]) * 600 + int(song2_plag_area[1]) * 60 + int(song2_plag_area[3]) * 10 + int(song2_plag_area[4]) * 1 + int(song2_plag_area[6]) /10
song2_end = int(song2_plag_area[10]) * 600 + int(song2_plag_area[11]) * 60 + int(song2_plag_area[13]) * 10 + int(song2_plag_area[14]) * 1 + int(song2_plag_area[16]) /10
song2_len = int(len(song2) / sr)
song2_len_plag = int( song2_end - song2_start )
rand_range = song2_len - song2_len_plag
song2_rand_saver = random.sample( range(0, rand_range), rand_num )
cnt_2 = 0
for i in range(0, rand_num) :
while ( abs( song2_rand_saver[i] - int(song2_start) ) <= int(song2_len_plag / 2)) :
new_rand = random.sample( range(0, rand_range), 1 )
song2_rand_saver[i] = new_rand[0]
cnt_2 += 1
#================================모든 난수 생성 완료================================
#================================song1 random data 저장 (2차원 배열로)================================
song1_rand_data = []
for i in range(0, rand_num) :
song1_rand_data.append([])
for j in range(0, len(song1_ext)) :
song1_rand_data[i].append(0)
for i in range(0, rand_num) :
song1_rand_data[i] = song1[song1_rand_saver[i] * sr : (song1_rand_saver[i] + song1_len_plag) * sr]
#================================song2 random data 저장 (2차원 배열로)================================
song2_rand_data = []
for i in range(0, rand_num) :
song2_rand_data.append([])
for j in range(0, len(song2_ext)) :
song2_rand_data[i].append(0)
for i in range(0, rand_num) :
song2_rand_data[i] = song2[song2_rand_saver[i] * sr : (song2_rand_saver[i] + song2_len_plag) * sr]
#================================모든 random data 생성 완료================================
#================================song1의 chroma Data 생성================================
song1_rand_chroma = []
for i in range(0, rand_num) :
song1_rand_chroma.append([])
for j in range(0, len(song1_chroma[0])) :
song1_rand_chroma[i].append(0)
for i in range(0, rand_num) :
song1_rand_chroma[i] = librosa.feature.chroma_stft(y = song1_rand_data[i], sr = sr)
#================================song2의 chroma Data 생성================================
song2_rand_chroma = []
for i in range(0, rand_num) :
song2_rand_chroma.append([])
for j in range(0, len(song2_chroma[0])) :####################################################################################
song2_rand_chroma[i].append(0)
for i in range(0, rand_num) :
song2_rand_chroma[i] = librosa.feature.chroma_stft(y = song2_rand_data[i], sr = sr)
#================================모든 chroma data 생성 완료================================
song1_rand_chroma = np.array(song1_rand_chroma)
song2_rand_chroma = np.array(song2_rand_chroma)
#================================chroma data 비교교================================
col_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
song1_vs_song2 = []
for i in range(0, 12) :
song1_vs_song2.append( FastDTW(song1_chroma[i], song2_chroma[i]) )
song1_vs_song2 = np.array(song1_vs_song2)
save_df = pd.DataFrame([song1_vs_song2], columns = col_names)
save_list = []
for i in range(0, rand_num) :
for j in range(0, 12) :
save_list.append( FastDTW(song1_chroma[j], song2_rand_chroma[i][j]) )
save_list = np.array(save_list)
save_list = pd.DataFrame([save_list], columns = col_names)
save_df = pd.concat([save_df, save_list])
save_list = []
for i in range(0, rand_num) :
for j in range(0, 12) :
save_list.append( FastDTW(song2_chroma[j], song1_rand_chroma[i][j]) )
save_list = np.array(save_list)
save_list = pd.DataFrame([save_list], columns = col_names)
save_df = pd.concat([save_df, save_list])
save_list = []
save_df.rename(columns = {'C' : 0, 'C#' : 1, 'D' : 2, 'D#' : 3,
'E' : 4, 'F' : 5, 'F#' : 6, 'G' : 7,
'G#' : 8, 'A' : 9, 'A#' : 10, 'B' : 11}, inplace = True)
score = 0
save_df = save_df.round(2)
save_df = save_df.reset_index(drop = True)
for i in range(0, 12) :
order = 0
save_df = save_df.sort_values(by = i)
index_saver = save_df.index
for j in range(0, len(index_saver)) :
if ( index_saver[j] != 0 ) :
order += 1
else :
order += 1
score += order
break
score = 100 - ((score - 12) /((rand_num * 2 + 1) * 12) * 100)
save_df.rename(columns = {0 : 'C' , 1 : 'C#', 2 : 'D' , 3 : 'D#',
4 : 'E' , 5 : 'F' , 6 : 'F#', 7 : 'G' ,
8 : 'G#', 9 : 'A' , 10 : 'A#', 11 : 'B'}, inplace = True)
save_df = save_df.sort_index(ascending = True)
return round(score, 2)
def check_genre(song, sr, model) :
col_names_drop = ['chroma_stft_mean', 'chroma_stft_var',
'rms_mean', 'rms_var',
'spectral_centroid_mean', 'spectral_centroid_var',
'spectral_bandwidth_mean', 'spectral_bandwidth_var',
'rolloff_mean', 'rolloff_var',
'zero_crossing_rate_mean','zero_crossing_rate_var',
'harmony_mean', 'harmony_var',
'perceptr_mean', 'perceptr_var',
'tempo',
'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean',
'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc12_mean','mfcc12_var',
'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean','mfcc15_var', 'mfcc16_mean', 'mfcc16_var',
'mfcc17_mean', 'mfcc18_mean', 'mfcc18_var','mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']
col_names = ['chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var',
'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean',
'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean',
'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var',
'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var',
'mfcc4_mean','mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean',
'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']
data_3sec = pd.read_csv(r"source/features_3_sec.csv");
data_30sec = pd.read_csv(r"source/features_30_sec.csv");
data = pd.concat([data_3sec, data_30sec])
X = data.drop("label", axis = 1)
X_droped = X.drop(["filename", "length", "mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1, inplace = False)
chroma_shift = librosa.feature.chroma_stft(song, n_fft=2048, hop_length=512) # 음악의 크로마 특징
rmse = librosa.feature.rms(song, frame_length=512, hop_length=512) # RMS값
spectral_centroids = librosa.feature.spectral_centroid(song, sr=sr) # 스펙트럼 무게 중심
spec_bw = librosa.feature.spectral_bandwidth(song, sr=sr) # 스펙트럼 대역폭
spectral_rolloff = librosa.feature.spectral_rolloff(song, sr=sr)[0] # rolloff
zcr = librosa.feature.zero_crossing_rate(song, hop_length=512) # zero to crossing
y_harm, y_perc = librosa.effects.hpss(song) # 하모닉, 충격파
tempo, _ = librosa.beat.beat_track(song, sr=sr) # 템포
mfcc = librosa.feature.mfcc(song, sr=sr,n_mfcc=20) # mfcc 20까지 추출
features_extracted = np.hstack([
np.mean(chroma_shift),
np.var(chroma_shift),
np.mean(rmse),
np.var(rmse),
np.mean(spectral_centroids),
np.var(spectral_centroids),
np.mean(spec_bw),
np.var(spec_bw),
np.mean(spectral_rolloff),
np.var(spectral_rolloff),
np.mean(zcr),
np.var(zcr),
np.mean(y_harm),
np.var(y_harm),
np.mean(y_perc),
np.var(y_perc),
tempo,
np.mean(mfcc.T, axis=0),
np.var(mfcc.T, axis=0)
])
features = features_extracted.reshape(1, 57)
input_df = pd.DataFrame(features, columns = col_names)
input_df = input_df.drop(["mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1)
df_concated = pd.concat([X_droped, input_df], axis = 0)
ss = StandardScaler()
concat_scaled = ss.fit_transform(np.array(df_concated.iloc[:, :], dtype = float))
concat_df = pd.DataFrame(concat_scaled, columns = col_names_drop)
input_df = concat_df.iloc[-1]
input_df = pd.Series.to_frame(input_df)
input_arr = input_df.to_numpy()
input_arr = input_arr.reshape(1, 53)
input_df = pd.DataFrame(input_arr, columns = col_names_drop)
prediction = model.predict(input_df)
if prediction == 0:
answer = "blues"
elif prediction == 1:
answer = "classical"
elif prediction == 2:
answer = "country"
elif prediction == 3:
answer = "disco"
elif prediction == 4:
answer = "hiphop"
elif prediction == 5:
answer = "jazz"
elif prediction == 6:
answer = "metal"
elif prediction == 7:
answer = "pop"
elif prediction == 8:
answer = "reggae"
else:
answer = "rock"
return prediction[0]
def Data_PreProcessing(data1, data2) :
data1 = pd.read_csv(data1);
data2 = pd.read_csv(data2);
data = pd.concat([data1, data2])
X = data.drop("label", axis = 1)
y = data.iloc[:, -1]
cvt = preprocessing.LabelEncoder()
y_encoded = cvt.fit_transform(y)
X_droped = X.drop(["filename", "length", "mfcc13_var", "mfcc17_var", "mfcc14_var", "mfcc11_var"], axis = 1, inplace = False)
ss = StandardScaler()
X_scaled = ss.fit_transform(np.array(X_droped.iloc[:, :], dtype = float))
X_df = pd.DataFrame(X_scaled, columns = ['chroma_stft_mean', 'chroma_stft_var',
'rms_mean', 'rms_var',
'spectral_centroid_mean', 'spectral_centroid_var',
'spectral_bandwidth_mean', 'spectral_bandwidth_var',
'rolloff_mean', 'rolloff_var',
'zero_crossing_rate_mean','zero_crossing_rate_var',
'harmony_mean', 'harmony_var',
'perceptr_mean', 'perceptr_var',
'tempo',
'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var',
'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var',
'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var',
'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc12_mean','mfcc12_var',
'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean','mfcc15_var', 'mfcc16_mean', 'mfcc16_var',
'mfcc17_mean', 'mfcc18_mean', 'mfcc18_var','mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var'])
y_df = pd.DataFrame(y_encoded, columns = ['target'])
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size = 0.2, random_state = 156, shuffle = True)
evals = [(X_test, y_test)]
return X_train, X_test, y_train, y_test, evals
def Make_Model(X_train, X_test, y_train, y_test, evals) :
model = LGBMClassifier(learning_rate = 0.06, n_estimators = 781, max_depth = 11, min_child_weight = 1,
gamma = 0, subsample = 1, colsample_bytree = 1, min_data_in_leaf = 28, num_leaves = 19,
booster = 'gbtree', importance_type = 'gain', scoring = "accuracy", n_jobs = -1, random_state = 777)
model.fit(X_train, y_train, eval_set = evals,eval_metric = "logloss", verbose = 0)
return model
def load_data(song1, song2, sr) :
song1_data, sr = librosa.load(song1, sr = sr)
song2_data, sr = librosa.load(song2, sr = sr)
return song1_data, song2_data, sr
def tempo_score(song1, song2, sr) :
song1_harm, song1_perc = librosa.effects.hpss(song1)
song1_zcr = librosa.feature.zero_crossing_rate(song1, hop_length = 512)
song1_spectral_centroid = librosa.feature.spectral_centroid(song1, sr = sr)
song1_perceptr_var = np.var(song1_perc)
song1_zcr_var = np.var(song1_zcr)
song1_spectral_centroid_mean = np.mean(song1_spectral_centroid)
song1_perceptr_var_scaled = ((song1_perceptr_var - 4.67 * (10**-8)) / (0.058879 - 4.67 * (10**-8)))
song1_zcr_var_scaled = ((song1_zcr_var - 5.02 * (10**-6)) / (0.065185 - 5.02 * (10**-6)))
song1_spectral_centroid_scaled = ((song1_spectral_centroid_mean - 300) / (5432.534 - 300))
song1_score = np.mean(song1_perceptr_var_scaled + song1_zcr_var_scaled + song1_spectral_centroid_scaled)
song2_harm, song2_perc = librosa.effects.hpss(song2)
song2_zcr = librosa.feature.zero_crossing_rate(song2, hop_length = 512)
song2_spectral_centroid = librosa.feature.spectral_centroid(song2, sr = sr)
song2_perceptr_var = np.var(song2_perc)
song2_zcr_var = np.var(song2_zcr)
song2_spectral_centroid_mean = np.mean(song2_spectral_centroid)
song2_perceptr_var_scaled = ((song2_perceptr_var - 4.67 * (10**-8)) / (0.058879 - 4.67 * (10**-8)))
song2_zcr_var_scaled = ((song2_zcr_var - 5.02 * (10**-6)) / (0.065185 - 5.02 * (10**-6)))
song2_spectral_centroid_scaled = ((song2_spectral_centroid_mean - 300) / (5432.534 - 300))
song2_score = np.mean(song2_perceptr_var_scaled + song2_zcr_var_scaled + song2_spectral_centroid_scaled)
song1_score = round(song1_score, 3)
song2_score = round(song2_score, 3)
tempo_score = round(100 - ((abs(song1_score - song2_score) / 1.2) * 100), 2)
return tempo_score
def dtw_score(song1, song1_plag_area, song2, song2_plag_area, sr, rand_num) :
# 나중에 모드를 여러개 정해보자.
# 만약 random이면 비교대상이 random이고, window면 주기적으로 window를 내는 방식이다.
song1_ext = np.array(extractor(song1, song1_plag_area, sr = sr))
song2_ext = np.array(extractor(song2, song2_plag_area, sr = sr))
#================================song1의 난수 생성================================
song1_start = int(song1_plag_area[0]) * 600 + int(song1_plag_area[1]) * 60 + int(song1_plag_area[3]) * 10 + int(song1_plag_area[4]) * 1 + int(song1_plag_area[6]) /10
song1_end = int(song1_plag_area[10]) * 600 + int(song1_plag_area[11]) * 60 + int(song1_plag_area[13]) * 10 + int(song1_plag_area[14]) * 1 + int(song1_plag_area[16]) /10
song1_len = int(len(song1) / sr)
song1_len_plag = int( song1_end - song1_start )
rand_range = song1_len - song1_len_plag
song1_rand_saver = random.sample( range(0, rand_range), rand_num )
cnt_1 = 0
for i in range(0, rand_num) :
while ( abs( song1_rand_saver[i] - int(song1_start) ) <= int(song1_len_plag / 2)) :
new_rand = random.sample( range(0, rand_range), 1 )
song1_rand_saver[i] = new_rand[0]
cnt_1 += 1
#================================song2의 난수 생성================================
song2_start = int(song2_plag_area[0]) * 600 + int(song2_plag_area[1]) * 60 + int(song2_plag_area[3]) * 10 + int(song2_plag_area[4]) * 1 + int(song2_plag_area[6]) /10
song2_end = int(song2_plag_area[10]) * 600 + int(song2_plag_area[11]) * 60 + int(song2_plag_area[13]) * 10 + int(song2_plag_area[14]) * 1 + int(song2_plag_area[16]) /10
song2_len = int(len(song2) / sr)
song2_len_plag = int( song2_end - song2_start )
rand_range = song2_len - song2_len_plag
song2_rand_saver = random.sample( range(0, rand_range), rand_num )
cnt_2 = 0
for i in range(0, rand_num) :
while ( abs( song2_rand_saver[i] - int(song2_start) ) <= int(song2_len_plag / 2)) :
new_rand = random.sample( range(0, rand_range), 1 )
song2_rand_saver[i] = new_rand[0]
cnt_2 += 1
#================================모든 난수 생성 완료================================
#================================song1 random data 저장 (2차원 배열로)================================
song1_rand_data = []
for i in range(0, rand_num) :
song1_rand_data.append([])
for j in range(0, len(song1_ext)) :
song1_rand_data[i].append(0)
for i in range(0, rand_num) :
song1_rand_data[i] = song1[song1_rand_saver[i] * sr : (song1_rand_saver[i] + song1_len_plag) * sr]
#================================song2 random data 저장 (2차원 배열로)================================
song2_rand_data = []
for i in range(0, rand_num) :
song2_rand_data.append([])
for j in range(0, len(song2_ext)) :
song2_rand_data[i].append(0)
for i in range(0, rand_num) :
song2_rand_data[i] = song2[song2_rand_saver[i] * sr : (song2_rand_saver[i] + song2_len_plag) * sr]
# ================================모든 random data 생성 완료================================
# ================================dtw scoring================================
saver = []
order = 0
song1song2 = FastDTW(song1_ext, song2_ext)
saver.append(song1song2)
for i in range(0, rand_num) :
saver.append(FastDTW(song1_ext, song2_rand_data[i]))
saver.append(FastDTW(song2_ext, song1_rand_data[i]))
save_df = pd.DataFrame(saver, columns = ['distance'])
save_df = save_df.sort_values(by = 'distance')
index_saver = save_df.index
for i in range(0, len(index_saver)) :
if(index_saver[i] != 0) :
order += 1
else :
order += 1
break
score = round(100 - ((order / (2 * rand_num + 1)) * 100), 2)
return score
def same_checker(song1, song2, sr) :
pearson, _ = stats.pearsonr(np.array(song1[sr*3:sr*6]), np.array(song2[sr*3:sr*6]))
return abs(pearson)
def genre_score(song1, song2, sr, model) :
conf_list = [[10, 2, 2, 2, 3, 1, 3, 0, 2, 4],
[0, 10, 0, 0, 0, 3, 0, 0, 0, 0],
[2, 0, 10, 1, 0, 6, 1, 1, 3, 2],
[1, 3, 1, 10, 4, 2, 1, 4, 1, 6],
[0, 0, 3, 2, 10, 0, 1, 5, 2, 1],
[4, 9, 7, 0, 0, 10, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 10, 0, 0, 2],
[0, 1, 1, 2, 3, 0, 0, 10, 5, 2],
[0, 2, 3, 3, 4, 1, 0, 3, 10, 1],
[3, 1, 5, 1, 2, 2, 4, 1, 3, 10]]
conf_arr = np.array(conf_list)
song1_genre = check_genre(song1, sr = sr, model = model)
song2_genre = check_genre(song2, sr = sr, model = model)
genre_score = round(( (conf_arr[song1_genre][song2_genre] + conf_arr[song2_genre][song1_genre]) / 20 ) * 100, 2)
return genre_score
def run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model, hard, sr_setting) :
#hard : 7
same_score = same_checker(song1, song2, sr)
if same_score < 0.9 :
#print("검사를 시작합니다.\n")
genre = genre_score(song1, song2, sr, model)
print("Genre Score : {} 점".format(genre))
chroma = chroma_score(song1, song1_plag_area, song2, song2_plag_area, sr, rand_num = hard)
print("Chroma Score : {} 점".format(chroma))
tempo = tempo_score(song1, song2, sr = sr)
print("Tempo Score : {} 점".format(tempo))
song1 = librosa.resample(song1, orig_sr = sr, target_sr = sr_setting)
song2 = librosa.resample(song2, orig_sr = sr, target_sr = sr_setting)
dtw =dtw_score(song1, song1_plag_area, song2, song2_plag_area, sr = sr_setting, rand_num = hard)
print("DTW Score : {} 점".format(dtw))
final_score = round((dtw * 45 + chroma * 30 + tempo * 15 + genre * 10) / 100, 2)
#print("=========================")
print("Plagirism Score : {} 점\n".format(final_score))
#print("=========================")
else :
print("같은 노래를 입력하셨습니다.")
return final_score
def tester(hard_level, sr_level):
saver = []
saver_rand = []
song1_file = r"source/1-1.wav"
song2_file = r"source/1-2.wav"
song1_plag_area = plag_1_1
song2_plag_area = plag_1_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/2-1.wav"
song2_file = r"source/2-2.wav"
song1_plag_area = plag_2_1
song2_plag_area = plag_2_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/3-1.wav"
song2_file = r"source/3-2.wav"
song1_plag_area = plag_3_1
song2_plag_area = plag_3_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/5-1.wav"
song2_file = r"source/5-2.wav"
song1_plag_area = plag_5_1
song2_plag_area = plag_5_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/6-1.wav"
song2_file = r"source/6-2.wav"
song1_plag_area = plag_6_1
song2_plag_area = plag_6_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/7-1.wav"
song2_file = r"source/7-2.wav"
song1_plag_area = plag_7_1
song2_plag_area = plag_7_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/9-1.wav"
song2_file = r"source/9-2.wav"
song1_plag_area = plag_9_1
song2_plag_area = plag_9_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/10-1.wav"
song2_file = r"source/10-2.wav"
song1_plag_area = plag_10_1
song2_plag_area = plag_10_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/11-1.wav"
song2_file = r"source/11-2.wav"
song1_plag_area = plag_11_1
song2_plag_area = plag_11_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/12-1.wav"
song2_file = r"source/12-2.wav"
song1_plag_area = plag_12_1
song2_plag_area = plag_12_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/13-1.wav"
song2_file = r"source/13-2.wav"
song1_plag_area = plag_13_1
song2_plag_area = plag_13_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/14-1.wav"
song2_file = r"source/14-2.wav"
song1_plag_area = plag_14_1
song2_plag_area = plag_14_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
song1_file = r"source/16-1.wav"
song2_file = r"source/16-2.wav"
song1_plag_area = plag_16_1
song2_plag_area = plag_16_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver.append(fin_score)
#############################################################################################################
song1_file = r"source/1-1.wav"
song2_file = r"source/2-2.wav"
song1_plag_area = plag_1_1
song2_plag_area = plag_2_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/2-1.wav"
song2_file = r"source/3-2.wav"
song1_plag_area = plag_2_1
song2_plag_area = plag_3_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/3-1.wav"
song2_file = r"source/5-2.wav"
song1_plag_area = plag_3_1
song2_plag_area = plag_5_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/5-1.wav"
song2_file = r"source/6-2.wav"
song1_plag_area = plag_5_1
song2_plag_area = plag_6_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/6-1.wav"
song2_file = r"source/7-2.wav"
song1_plag_area = plag_6_1
song2_plag_area = plag_7_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/7-1.wav"
song2_file = r"source/9-2.wav"
song1_plag_area = plag_7_1
song2_plag_area = plag_9_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/9-1.wav"
song2_file = r"source/10-2.wav"
song1_plag_area = plag_9_1
song2_plag_area = plag_10_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/10-1.wav"
song2_file = r"source/11-2.wav"
song1_plag_area = plag_10_1
song2_plag_area = plag_11_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/11-1.wav"
song2_file = r"source/12-2.wav"
song1_plag_area = plag_11_1
song2_plag_area = plag_12_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/12-1.wav"
song2_file = r"source/13-2.wav"
song1_plag_area = plag_12_1
song2_plag_area = plag_13_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/13-1.wav"
song2_file = r"source/14-2.wav"
song1_plag_area = plag_13_1
song2_plag_area = plag_14_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/14-1.wav"
song2_file = r"source/16-2.wav"
song1_plag_area = plag_14_1
song2_plag_area = plag_16_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
song1_file = r"source/16-1.wav"
song2_file = r"source/1-2.wav"
song1_plag_area = plag_16_1
song2_plag_area = plag_1_2
song1, song2, sr = load_data(song1_file, song2_file, sr = 22050)
fin_score = run_algo(song1, song1_plag_area, song2, song2_plag_area, sr, model = model, hard = hard_level, sr_setting = sr_level)
saver_rand.append(fin_score)
print("표절 사례")
print(saver)
print("\n\n랜덤 사례")
print(saver_rand)
return saver, saver_rand
plag_1_1 = "00:00.0 ~ 00:07.5"
plag_1_2 = "00:00.0 ~ 00:07.5"
plag_2_1 = "00:30.7 ~ 00:39.0"
plag_2_2 = "00:42.7 ~ 00:51.0"
plag_3_1 = "00:39.5 ~ 00:52.5"
plag_3_2 = "02:22.3 ~ 02:35.3"
plag_5_1 = "01:09.2 ~ 01:15.5"
plag_5_2 = "01:03.5 ~ 01:10.0"
plag_6_1 = "00:12.3 ~ 00:25.5"
plag_6_2 = "00:41.9 ~ 00:54.3"
plag_7_1 = "00:39.5 ~ 00:59.5"
plag_7_2 = "02:22.3 ~ 02:42.3"
plag_9_1 = "00:16.3 ~ 00:23.3"
plag_9_2 = "00:00.0 ~ 00:09.1"
plag_10_1 = "01:04.0 ~ 01:08.4"
plag_10_2 = "00:58.4 ~ 01:03:0"
plag_11_1 = "00:29.8 ~ 00:36.4"
plag_11_2 = "00:12.9 ~ 00:21:0"
plag_12_1 = "00:31.7 ~ 00:37.3"
plag_12_2 = "00:46.5 ~ 00:53:1"
plag_13_1 = "00:00.0 ~ 00:12.5"
plag_13_2 = "00:03.4 ~ 00:16:3"
plag_14_1 = "00:02.2 ~ 00:10.8"
plag_14_2 = "00:19.2 ~ 00:28:1"
plag_16_1 = "00:30.2 ~ 00:37.3"
plag_16_2 = "00:29.1 ~ 00:36:3"
### Train Data, Test Data, Evaluation Data
X_train, X_test, y_train, y_test, evals = Data_PreProcessing(r"source/features_3_sec.csv", r"source/features_30_sec.csv")
### Make ML model (LGBMClassifier)
#model = Make_Model(X_train, X_test, y_train, y_test, evals)
### Call ML model (LGBMClassifier)
model = joblib.load(r'source/my_model.pkl')
Genre Score : 40.0 점
Chroma Score : 98.08 점
Tempo Score : 77.0 점
DTW Score : 92.31 점
Plagirism Score : 86.51 점
Genre Score : 100.0 점
Chroma Score : 53.85 점
Tempo Score : 97.75 점
DTW Score : 23.08 점
Plagirism Score : 51.2 점
Genre Score : 100.0 점
Chroma Score : 62.82 점
Tempo Score : 83.83 점
DTW Score : 92.31 점
Plagirism Score : 82.96 점
Genre Score : 100.0 점
Chroma Score : 60.26 점
Tempo Score : 93.08 점
DTW Score : 53.85 점
Plagirism Score : 66.27 점
Genre Score : 40.0 점
Chroma Score : 60.26 점
Tempo Score : 87.75 점
DTW Score : 76.92 점
Plagirism Score : 69.85 점
Genre Score : 0.0 점
Chroma Score : 67.95 점
Tempo Score : 70.0 점
DTW Score : 69.23 점
Plagirism Score : 62.04 점
Genre Score : 30.0 점
Chroma Score : 53.21 점
Tempo Score : 82.17 점
DTW Score : 15.38 점
Plagirism Score : 38.21 점
Genre Score : 40.0 점
Chroma Score : 44.87 점
Tempo Score : 73.17 점
DTW Score : 61.54 점
Plagirism Score : 56.13 점
Genre Score : 40.0 점
Chroma Score : 47.44 점
Tempo Score : 96.83 점
DTW Score : 46.15 점
Plagirism Score : 53.52 점
Genre Score : 0.0 점
Chroma Score : 50.64 점
Tempo Score : 81.58 점
DTW Score : 30.77 점
Plagirism Score : 41.28 점
Genre Score : 100.0 점
Chroma Score : 62.82 점
Tempo Score : 66.58 점
DTW Score : 30.77 점
Plagirism Score : 52.68 점
Genre Score : 0.0 점
Chroma Score : 81.41 점
Tempo Score : 95.25 점
DTW Score : 84.62 점
Plagirism Score : 76.79 점
Genre Score : 100.0 점
Chroma Score : 68.59 점
Tempo Score : 61.25 점
DTW Score : 61.54 점
Plagirism Score : 67.46 점
Genre Score : 100.0 점
Chroma Score : 27.56 점
Tempo Score : 79.17 점
DTW Score : 7.69 점
Plagirism Score : 33.6 점
Genre Score : 100.0 점
Chroma Score : 51.28 점
Tempo Score : 94.42 점
DTW Score : 38.46 점
Plagirism Score : 56.85 점
Genre Score : 0.0 점
Chroma Score : 53.85 점
Tempo Score : 94.42 점
DTW Score : 76.92 점
Plagirism Score : 64.93 점
Genre Score : 0.0 점
Chroma Score : 48.72 점
Tempo Score : 74.58 점
DTW Score : 69.23 점
Plagirism Score : 56.96 점
Genre Score : 0.0 점
Chroma Score : 50.64 점
Tempo Score : 63.0 점
DTW Score : 38.46 점
Plagirism Score : 41.95 점
Genre Score : 30.0 점
Chroma Score : 58.33 점
Tempo Score : 94.25 점
DTW Score : 53.85 점
Plagirism Score : 58.87 점
Genre Score : 100.0 점
Chroma Score : 57.05 점
Tempo Score : 86.42 점
DTW Score : 61.54 점
Plagirism Score : 67.77 점
Genre Score : 40.0 점
Chroma Score : 46.15 점
Tempo Score : 79.33 점
DTW Score : 38.46 점
Plagirism Score : 47.05 점
Genre Score : 0.0 점
Chroma Score : 48.08 점
Tempo Score : 79.25 점
DTW Score : 23.08 점
Plagirism Score : 36.7 점
Genre Score : 40.0 점
Chroma Score : 66.03 점
Tempo Score : 83.25 점
DTW Score : 69.23 점
Plagirism Score : 67.45 점
Genre Score : 0.0 점
Chroma Score : 64.74 점
Tempo Score : 69.25 점
DTW Score : 84.62 점
Plagirism Score : 67.89 점
Genre Score : 100.0 점
Chroma Score : 28.21 점
Tempo Score : 97.83 점
DTW Score : 46.15 점
Plagirism Score : 53.9 점
Genre Score : 40.0 점
Chroma Score : 26.28 점
Tempo Score : 59.92 점
DTW Score : 92.31 점
Plagirism Score : 62.41 점
표절 사례
[86.51, 51.2, 82.96, 66.27, 69.85, 62.04, 38.21, 56.13, 53.52, 41.28, 52.68, 76.79, 67.46]
랜덤 사례
[33.6, 56.85, 64.93, 56.96, 41.95, 58.87, 67.77, 47.05, 36.7, 67.45, 67.89, 53.9, 62.41]
'Project > Music Plagiarism Project' 카테고리의 다른 글
[tester(hard_level = 8, sr_level = 100)] (0) | 2022.11.10 |
---|---|
[tester(hard_level = 7, sr_level = 100)] (0) | 2022.11.10 |
[##midway check-up##] - Searching File with Button (0) | 2022.11.08 |
[##midway check-up##] - PyQt5 (0) | 2022.11.07 |
[##midway check-up##] - 2 (0) | 2022.11.07 |
Comments