first commit

This commit is contained in:
2024-09-05 12:45:40 +08:00
commit e148adef80
98 changed files with 2808 additions and 0 deletions

55
Lab/Lab3/code/test.py Normal file
View File

@@ -0,0 +1,55 @@
import scipy.io.wavfile as wav
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import ipdb;
# 读取音频文件
filename = "./tang1.wav"
sample_rate, sound_array = wav.read(filename)
sound_array = sound_array.T[0, :] if sound_array.ndim != 1 else sound_array
sound_array = sound_array / np.max(np.abs(sound_array)) # 归一化
frame_length = int(sample_rate * 0.01)
num_frames = len(sound_array) // frame_length
autocorrelation = np.zeros((num_frames, frame_length))
autocorrelation_of_candidates = np.zeros((num_frames, frame_length))
min_peak_threshold = min(sample_rate // 400, frame_length)
max_peak_threshold = min(sample_rate // 80, frame_length)
for n in range(num_frames):
frame = sound_array[n * frame_length: (n + 1) * frame_length]
autocorrelation[n, :] = signal.correlate(frame, frame, mode='full')[frame_length - 1:]
# 基频阈值为80-400Hz则基音周期即延迟t最小为sample_rate/400最大为sample_rate/80
# 本应该使用峰值的延迟作为基音周期的候选值,但是发现峰值(局部最大值)并不好判断,同时一帧内的点数不多,因此将阈值内的所有点都作为候选点
# 那么将不在阈值内的自相关系数置为一个非常小的数,从而不让算法选择不在阈值内的基音周期
autocorrelation_of_candidates[n, :] = np.pad(
autocorrelation[n, min_peak_threshold : max_peak_threshold],
(min_peak_threshold, max(frame_length - max_peak_threshold, 0)),
mode='constant',
constant_values=-30.0,
)
dist = -autocorrelation
cost = np.zeros((num_frames, frame_length))
path = np.zeros((num_frames, frame_length))
for n in range(num_frames - 1):
for j in range(min_peak_threshold, max_peak_threshold):
# f0 = sample_rate / candidate
cost[n + 1, j] = dist[n + 1, j] + np.min(
cost[n, :] + np.abs(sample_rate / np.arange(frame_length) - sample_rate / j)
)
path[n + 1, j] = np.argmin(
cost[n, :] + np.abs(sample_rate / np.arange(frame_length) - sample_rate / j)
)
l_hat = np.zeros(num_frames, dtype=np.int32)
l_hat[num_frames - 1] = np.argmin(cost[num_frames - 1, :])
for n in range(num_frames - 2, -1, -1):
l_hat[n] = path[n + 1, l_hat[n + 1]]
f0 = sample_rate / l_hat