%load_ext autoreload
%autoreload 2
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
from scipy.ndimage import maximum_filter
from spectrogramtools import *
import scipy.io as sio
from scipy.spatial import KDTree
import librosa
x, sr = librosa.load("Despacito.avi")
/home/ctralie/anaconda3/lib/python3.8/site-packages/librosa/core/audio.py:165: UserWarning: PySoundFile failed. Trying audioread instead. warnings.warn("PySoundFile failed. Trying audioread instead.")
print(sr)
win_length = 2048
hop_length = 1024
max_freq = 256
S = STFT(x, win_length, hop_length, useLibrosa=False)
S = np.abs(S)
orig_shape = S.shape[0]
S = S[0:max_freq, :]
time_win = 8
freq_win = 5
Maxes = maximum_filter(S, size=(time_win*2+1, freq_win*2+1))
SM = S == Maxes
x, y = np.meshgrid(np.arange(SM.shape[1]), np.arange(SM.shape[0]))
x = x[SM == 1]
y = y[SM == 1]
X = np.array([x, y]).T
plt.figure(figsize=(10, 7))
plt.subplot(211)
plt.imshow(S, aspect='auto', cmap='magma_r')
plt.scatter(x, y)
plt.gca().invert_yaxis()
plt.subplot(212)
plt.imshow(SM, aspect='auto')
plt.gca().invert_yaxis()
plt.tight_layout()
#"""
22050
Peaks = np.zeros((orig_shape, S.shape[1]))
Peaks[0:S.shape[0], :] = SM
y = griffinLimInverse(Peaks, win_length, hop_length, useLibrosa=False)
ipd.Audio(y, rate=sr)
Iteration 1 of 10 Iteration 2 of 10 Iteration 3 of 10 Iteration 4 of 10 Iteration 5 of 10 Iteration 6 of 10 Iteration 7 of 10 Iteration 8 of 10 Iteration 9 of 10 Iteration 10 of 10
S = STFT(y, win_length, hop_length, useLibrosa=False)
S = np.abs(S)
orig_shape = S.shape[0]
S = S[0:max_freq, :]
plt.figure(figsize=(10, 2))
plt.imshow(S, aspect='auto')
<matplotlib.image.AxesImage at 0x7fb9b10f5c40>