UOMOP

Pearson Coefficient, DTW(Dynamic Time Wrapping) 본문

Project/Music Plagiarism Project

Pearson Coefficient, DTW(Dynamic Time Wrapping)

Happy PinGu 2022. 10. 25. 04:11
pip install dtw-python

# ========================= 라이브러리 호출 =========================
from scipy import stats
from random import *
import numpy as np
import pandas as pd
import librosa
import librosa.display
import soundfile as sf
import IPython.display as ipd
import math
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import seaborn as sns
import IPython

from dtaidistance import dtw
from fastdtw import fastdtw
from dtw import *
from scipy.spatial.distance import euclidean

def Devider(A, B, sr, period, mode) :
    # A    : 표절 논란곡 1
    # B    : 표절 논란곡 2
    # sr   : A, B의 Sampling Rate
    # period : 몇 초씩 쪼갤 것인가
    # mode = 1 : 전체 길이 데이터를 사용
    #      = 2 : 전체 길의의 1/2만을 사용 


    # ===== 두 곡의 길이를 동일하게 설정 =====
    length = int( ( min(len(A.tolist()), len(B.tolist())) ) / mode )
    A = (A[0 : length])
    B = (B[0 : length])

    # ===== 2곡의 Data를 몇 개로 dividing 할 것인지 확인 =====
    num_of_window =  math.floor(length / (period * sr))

    final_index_A = 0
    final_index_B = 0

    # ===== 각 곡의 dividing된 Data가 Mat_A, Mat_B에 2D-array로 save =====
    Mat_A = [[0 for col in range(int(period * sr))] for row in range(int(num_of_window))]
    Mat_B = [[0 for col in range(int(period * sr))] for row in range(int(num_of_window))]

    # ===== 각 곡의 dividing된 Data가 Mat_A, Mat_B에 2D-array로 save =====
    for i in range(0, num_of_window) :
        for j in range(0, int(period * sr)) :
            Mat_A[i][j] = A[final_index_A  + j]
            Mat_B[i][j] = B[final_index_B  + j]
        final_index_A = int(period * sr) * (i+1)
        final_index_B = int(period * sr) * (i+1)

    return Mat_A, Mat_B
    
def extractor(A, area, sr) :
    # area = 01:31.05 ~ 02:18.32
    
    area = str(area)
    start = int(area[0]) * 600 + int(area[1]) * 60 + int(area[3]) * 10 + int(area[4]) * 1 + int(area[6]) /100
    end   = int(area[10]) * 600 + int(area[11]) * 60 + int(area[13]) * 10 + int(area[14]) * 1 + int(area[16]) /10
    A_cut = A[int(start * sr) : int(end * sr)]

    return A_cut
    
def tempo_checker(input) :
    input_flatness = librosa.feature.spectral_flatness(y = input)
    tempo_final = np.std(input_flatness[0])

    return tempo_final
    
def Pearson(A, B) :

    pearson_coef, _ = stats.pearsonr(np.array(A), np.array(B))

    final_score = (round(np.abs(pearson_coef), 6) * 1000)

    return final_score
    
def FastDTW(A, B):
    distance, path = fastdtw(A, B, dist = euclidean)
    return distance
    
def get_features(y, sr) : # Categorizing label data
  
    chroma_shift = librosa.feature.chroma_stft(y, n_fft=2048, hop_length=512) # 음악의 크로마 특징
    rmse = librosa.feature.rms(y, frame_length=512, hop_length=512)           # RMS값
    spectral_centroids = librosa.feature.spectral_centroid(y, sr=sr)          # 스펙트럼 무게 중심
    spec_bw = librosa.feature.spectral_bandwidth(y, sr=sr)                    # 스펙트럼 대역폭
    spectral_rolloff = librosa.feature.spectral_rolloff(y, sr=sr)[0]          # rolloff
    zcr = librosa.feature.zero_crossing_rate(y, hop_length=512)               # zero to crossing
    y_harm, y_perc = librosa.effects.hpss(y)                                  # 하모닉, 충격파
    tempo, _ = librosa.beat.beat_track(y, sr=sr)                              # 템포
    mfcc = librosa.feature.mfcc(y, sr=sr,n_mfcc=20)                           # mfcc 20까지 추출

    features_extracted = np.hstack([                                    
                                    np.mean(chroma_shift),
                                    np.var(chroma_shift),
                                    np.mean(rmse),
                                    np.var(rmse),
                                    np.mean(spectral_centroids),
                                    np.var(spectral_centroids),
                                    np.mean(spec_bw),
                                    np.var(spec_bw),
                                    np.mean(spectral_rolloff),
                                    np.var(spectral_rolloff),
                                    np.mean(zcr),
                                    np.var(zcr),
                                    np.mean(y_harm),
                                    np.var(y_harm),
                                    np.mean(y_perc),
                                    np.var(y_perc),
                                    tempo,
                                    np.mean(mfcc.T, axis=0),
                                    np.var(mfcc.T, axis=0)
                                                            ])

    features = features_extracted.reshape(1, 57)

    col_name = ['chroma_stft_mean',   'chroma_stft_var', 'rms_mean', 'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean',   'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']      
    df = pd.DataFrame(features, columns = col_name)
    array = (np.array(features)).tolist()

    return df, array
    
    
plag_1_1 = "00:00.0 ~ 00:07.5"
plag_1_2 = "00:00.0 ~ 00:07.5"

plag_2_1 = "00:30.7 ~ 00:39.0"
plag_2_2 = "00:42.7 ~ 00:51.0"

plag_3_1 = "00:39.5 ~ 00:59.5"
plag_3_2 = "02:22.3 ~ 02:42.3"

#plag_4_1 = "00:39.5 ~ 00:59.5"
#plag_4_2 = "02:22.3 ~ 02:42.3"
# 랩 노래는 나중에 다른 과정을 거쳐야할 것으로 보인다.

plag_5_1 = "01:09.7 ~ 00:59.5"
plag_5_2 = "01:20.0 ~ 01:25.4"

plag_6_1 = "00:12.3 ~ 00:25.5"
plag_6_2 = "00:41.9 ~ 00:54.3"

plag_7_1 = "00:39.5 ~ 00:59.5"
plag_7_2 = "02:22.3 ~ 02:42.3"

random_area_1 = "00:12.0 ~ 00:19.5"
random_area_2 = "00:21.0 ~ 00:28.5"
random_area_3 = "00:39.0 ~ 00:46.5"
random_area_4 = "00:57.0 ~ 01:04.5"
random_area_5 = "01:10.0 ~ 01:17.5"
random_area_6 = "01:19.0 ~ 01:26.5"
random_area_7 = "02:00.0 ~ 02:07.5"

A_22050, sr_22050 = librosa.load('1-1.wav', sr = 22050)
B_22050, sr_22050 = librosa.load('1-2.wav', sr = 22050)

A_16000, sr_16000 = librosa.load('1-1.wav', sr = 16000)
B_16000, sr_16000 = librosa.load('1-2.wav', sr = 16000)

A_8000, sr_8000 = librosa.load('1-1.wav', sr = 8000)
B_8000, sr_8000 = librosa.load('1-2.wav', sr = 8000)

A_4000, sr_4000 = librosa.load('1-1.wav', sr = 4000)
B_4000, sr_4000 = librosa.load('1-2.wav', sr = 4000)

A_2000, sr_2000 = librosa.load('1-1.wav', sr = 2000)
B_2000, sr_2000 = librosa.load('1-2.wav', sr = 2000)


A_plag_area = plag_1_1
B_plag_area = plag_1_2 

sr_22050 = 22050

A_ext_22050 = np.array(extractor(A_22050, A_plag_area, sr = sr_22050))
B_ext_22050 = np.array(extractor(B_22050, B_plag_area, sr = sr_22050))

B_rand_22050_1 = np.array(extractor(B_22050, random_area_1, sr = sr_22050))
B_rand_22050_2 = np.array(extractor(B_22050, random_area_2, sr = sr_22050))
B_rand_22050_3 = np.array(extractor(B_22050, random_area_3, sr = sr_22050))
B_rand_22050_4 = np.array(extractor(B_22050, random_area_4, sr = sr_22050))
B_rand_22050_5 = np.array(extractor(B_22050, random_area_5, sr = sr_22050))
B_rand_22050_6 = np.array(extractor(B_22050, random_area_6, sr = sr_22050))
B_rand_22050_7 = np.array(extractor(B_22050, random_area_7, sr = sr_22050))

score_pearson_correct = Pearson(A_ext_22050, B_ext_22050)

score_pearson_wrong1 = Pearson(A_ext_22050, B_rand_22050_1)
score_pearson_wrong2 = Pearson(A_ext_22050, B_rand_22050_2)
score_pearson_wrong3 = Pearson(A_ext_22050, B_rand_22050_3)
score_pearson_wrong4 = Pearson(A_ext_22050, B_rand_22050_4)
score_pearson_wrong5 = Pearson(A_ext_22050, B_rand_22050_5)
score_pearson_wrong6 = Pearson(A_ext_22050, B_rand_22050_6)
score_pearson_wrong7 = Pearson(A_ext_22050, B_rand_22050_7)

print("유사영역의 Pearson Coefficient값 : {}".format(score_pearson_correct))
print("=========================================")
print("랜덤영역의 Pearson Coefficient값 : {}".format(score_pearson_wrong1))
print("                                 : {}".format(score_pearson_wrong2))
print("                                 : {}".format(score_pearson_wrong3))
print("                                 : {}".format(score_pearson_wrong4))
print("                                 : {}".format(score_pearson_wrong5))
print("                                 : {}".format(score_pearson_wrong6))
print("                                 : {}".format(score_pearson_wrong7))

Comments