UOMOP

[Plagiarism Scoring using DTW with Librosa's Chroma] 본문

Project/Music Plagiarism Project

[Plagiarism Scoring using DTW with Librosa's Chroma]

Happy PinGu 2022. 11. 4. 20:59
def extractor(A, area, sr) :
    # area = 01:31.5 ~ 02:18.3
    
    area = str(area)
    start = int(area[0]) * 600 + int(area[1]) * 60 + int(area[3]) * 10 + int(area[4]) * 1 + int(area[6]) /10
    end   = int(area[10]) * 600 + int(area[11]) * 60 + int(area[13]) * 10 + int(area[14]) * 1 + int(area[16]) /10
    A_cut = A[int(start * sr) : int(end * sr)]

    return A_cut
    
    
    
    
def chroma_score(song1, song1_plag_area, song2, song2_plag_area, sr, num_rand) :
     # 나중에 모드를 여러개 정해보자.
     # 만약 random이면 비교대상이 random이고, window면 주기적으로 window를 내는 방식이다.

    song1_ext = np.array(extractor(song1, song1_plag_area, sr = sr))
    song2_ext = np.array(extractor(song2, song2_plag_area, sr = sr))

    song1_chroma = librosa.feature.chroma_stft(y = song1_ext, sr = sr)
    song2_chroma = librosa.feature.chroma_stft(y = song2_ext, sr = sr)

    #================================song1의 난수 생성================================

    song1_start = int(song1_plag_area[0]) * 600 + int(song1_plag_area[1]) * 60 + int(song1_plag_area[3]) * 10 + int(song1_plag_area[4]) * 1 + int(song1_plag_area[6]) /10
    song1_end   = int(song1_plag_area[10]) * 600 + int(song1_plag_area[11]) * 60 + int(song1_plag_area[13]) * 10 + int(song1_plag_area[14]) * 1 + int(song1_plag_area[16]) /10
    song1_len   = int(len(song1) / sr)
    song1_len_plag = int( song1_end - song1_start ) 

    rand_range = song1_len - song1_len_plag

    song1_rand_saver = random.sample( range(0, rand_range), num_rand )


    print("song1_rand_saver Before : {}".format(song1_rand_saver))

    for i in range(0, num_rand) :

        while ( abs( song1_rand_saver[i] - int(song1_start) )  <=   int(song1_len_plag / 2)) :
            new_rand = random.sample( range(0, rand_range), 1 )
            song1_rand_saver[i] = new_rand[0]

    print("song1_rand_saver After  : {}".format(song1_rand_saver))

    #================================song2의 난수 생성================================

    song2_start = int(song2_plag_area[0]) * 600 + int(song2_plag_area[1]) * 60 + int(song2_plag_area[3]) * 10 + int(song2_plag_area[4]) * 1 + int(song2_plag_area[6]) /10
    song2_end   = int(song2_plag_area[10]) * 600 + int(song2_plag_area[11]) * 60 + int(song2_plag_area[13]) * 10 + int(song2_plag_area[14]) * 1 + int(song2_plag_area[16]) /10
    song2_len   = int(len(song2) / sr)
    song2_len_plag = int( song2_end - song2_start ) 

    rand_range = song2_len - song2_len_plag

    song2_rand_saver = random.sample( range(0, rand_range), num_rand )


    print("song2_rand_saver Before : {}".format(song2_rand_saver))

    for i in range(0, num_rand) :

        while ( abs( song2_rand_saver[i] - int(song2_start) )  <=   int(song2_len_plag / 2)) :
            new_rand = random.sample( range(0, rand_range), 1 )
            song2_rand_saver[i] = new_rand[0]

    print("song2_rand_saver After  : {}".format(song2_rand_saver))

    #================================모든 난수 생성 완료================================

    print("song1'의 랜덤한 시간(초) {}개 : {}".format(num_rand, song1_rand_saver))
    print("song2'의 랜덤한 시간(초) {}개 : {}".format(num_rand, song2_rand_saver))

    #================================song1 random data 저장 (2차원 배열로)================================
    song1_rand_data = []

    for i in range(0, num_rand) :
        song1_rand_data.append([])
        for j in range(0, len(song1_ext)) :
            song1_rand_data[i].append(0)

    for i in range(0, num_rand) :
        song1_rand_data[i] = song1[song1_rand_saver[i] * sr : (song1_rand_saver[i] + song1_len_plag) * sr]


    #================================song2 random data 저장 (2차원 배열로)================================
    song2_rand_data = []

    for i in range(0, num_rand) :
        song2_rand_data.append([])
        for j in range(0, len(song2_ext)) :
            song2_rand_data[i].append(0)

    for i in range(0, num_rand) :
        song2_rand_data[i] = song2[song2_rand_saver[i] * sr : (song2_rand_saver[i] + song2_len_plag) * sr]

    #================================모든 random data 생성 완료================================

    print("song1 rand data's shape : {}".format(len(song1_rand_data)))
    print("song2 rand data's shape : {}".format(len(song2_rand_data)))

    #================================song1의 chroma Data 생성================================


    song1_rand_chroma = []

    for i in range(0, num_rand) :
        song1_rand_chroma.append([])
        for j in range(0, len(song1_chroma[0])) :
            song1_rand_chroma[i].append(0)

    for i in range(0, num_rand) :
        song1_rand_chroma[i] = librosa.feature.chroma_stft(y = song1_rand_data[i], sr = sr)

    #================================song2의 chroma Data 생성================================

    song2_rand_chroma = []

    for i in range(0, num_rand) :
        song2_rand_chroma.append([])
        for j in range(0, len(song2_chroma)) :
            song2_rand_chroma[i].append(0)

    for i in range(0, num_rand) :
        song2_rand_chroma[i] = librosa.feature.chroma_stft(y = song2_rand_data[i], sr = sr)


    #================================모든 chroma data 생성 완료================================

    song1_rand_chroma = np.array(song1_rand_chroma)
    song2_rand_chroma = np.array(song2_rand_chroma)

    print("song1 chroma data's shape : {}".format(song1_rand_chroma.shape))
    print("song2 chroma data's shape : {}".format(song2_rand_chroma.shape))

    #================================chroma data 비교교================================


    col_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

    song1_vs_song2 = []
    

    for i in range(0, 12) :
        song1_vs_song2.append(  FastDTW(song1_chroma[i], song2_chroma[i])  )

    song1_vs_song2 = np.array(song1_vs_song2)

    save_df = pd.DataFrame([song1_vs_song2], columns = col_names)


    save_list = []

    for i in range(0, num_rand) :

        for j in range(0, 12) :
            save_list.append( FastDTW(song1_chroma[j], song2_rand_chroma[i][j]) )

        save_list = np.array(save_list)
        save_list = pd.DataFrame([save_list], columns = col_names)

        save_df = pd.concat([save_df, save_list])

        save_list = []


    for i in range(0, num_rand) :

        for j in range(0, 12) :
            save_list.append( FastDTW(song2_chroma[j], song1_rand_chroma[i][j]) )

        save_list = np.array(save_list)
        save_list = pd.DataFrame([save_list], columns = col_names)

        save_df = pd.concat([save_df, save_list])

        save_list = []

    save_df.rename(columns = {'C'  : 0, 'C#' : 1, 'D'  : 2, 'D#' : 3, 
                              'E'  : 4, 'F'  : 5, 'F#' : 6, 'G'  : 7, 
                              'G#' : 8, 'A'  : 9, 'A#' : 10, 'B' : 11}, inplace = True)

    score = 0

    save_df = save_df.round(2)
    save_df = save_df.reset_index(drop = True)


    for i in range(0, 12) :

        order = 0

        save_df = save_df.sort_values(by = i)
        index_saver = save_df.index

        
        for j in range(0, len(index_saver)) :

            if ( index_saver[j] != 0 ) :
                order += 1
                
            else :
                order += 1
                score += order

                break

    score = 100 - ((score - 11) /156 * 100)

    print("=====================")
    print("Chroma Score : {:.2f}점".format(score))
    print("=====================")

    save_df.rename(columns = {0 : 'C' , 1 : 'C#', 2  : 'D' , 3  : 'D#', 
                              4 : 'E' , 5 : 'F' , 6  : 'F#', 7  : 'G' , 
                              8 : 'G#', 9 : 'A' , 10 : 'A#', 11 : 'B'}, inplace = True)

    save_df = save_df.sort_index(ascending = True)

    return save_df, score
song1, sr = librosa.load("5-1.wav", sr = 22050)
song2, sr = librosa.load("5-2.wav", sr = 22050)

plag_1_1 = "00:00.0 ~ 00:07.5"
plag_1_2 = "00:00.0 ~ 00:07.5"

plag_2_1 = "00:30.7 ~ 00:39.0"
plag_2_2 = "00:42.7 ~ 00:51.0"

plag_3_1 = "00:39.5 ~ 00:52.5"
plag_3_2 = "02:22.3 ~ 02:35.3"

#plag_4_1 = "00:39.5 ~ 00:59.5"
#plag_4_2 = "02:22.3 ~ 02:42.3"
# 랩 노래는 나중에 다른 과정을 거쳐야할 것으로 보인다.

plag_5_1 = "01:09.2 ~ 01:15.5"
plag_5_2 = "01:03.5 ~ 01:10.0"

plag_6_1 = "00:12.3 ~ 00:25.5"
plag_6_2 = "00:41.9 ~ 00:54.3"

plag_7_1 = "00:39.5 ~ 00:59.5"
plag_7_2 = "02:22.3 ~ 02:42.3"

song1_plag_area = plag_5_1
song2_plag_area = plag_5_2

df, score = chroma_score(song1, song1_plag_area, song2, song2_plag_area, sr, num_rand = 7)
df.head(30)

표절 논란이 있는 노래를 Chroma Function에 넣었더니, 약 70점이 나옴.
연관이 없는 노래를 넣어보고 함수를 재확인.

점수가 낮게 측정된다.

num_rand 파라미터를 7로 할 때가 이상적.

 

 

 

 

 

Comments