first commit

2024-09-05 12:45:40 +08:00
commit e148adef80
98 changed files with 2808 additions and 0 deletions
--- a/Lab/Lab3/code/test.py
+++ b/Lab/Lab3/code/test.py
@@ -0,0 +1,55 @@
+import scipy.io.wavfile as wav
+from scipy import signal
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import ipdb;
+
+# 读取音频文件
+filename = "./tang1.wav"
+sample_rate, sound_array = wav.read(filename)
+sound_array = sound_array.T[0, :] if sound_array.ndim != 1 else sound_array
+sound_array = sound_array / np.max(np.abs(sound_array))  # 归一化
+
+frame_length = int(sample_rate * 0.01)
+num_frames = len(sound_array) // frame_length
+autocorrelation = np.zeros((num_frames, frame_length))
+autocorrelation_of_candidates = np.zeros((num_frames, frame_length))
+min_peak_threshold = min(sample_rate // 400, frame_length)
+max_peak_threshold = min(sample_rate // 80, frame_length)
+for n in range(num_frames):
+    frame = sound_array[n * frame_length: (n + 1) * frame_length]
+    autocorrelation[n, :] = signal.correlate(frame, frame, mode='full')[frame_length - 1:]
+    # 基频阈值为80-400Hz，则基音周期（即延迟）t最小为sample_rate/400，最大为sample_rate/80
+    
+    # 本应该使用峰值的延迟作为基音周期的候选值，但是发现峰值（局部最大值）并不好判断，同时一帧内的点数不多，因此将阈值内的所有点都作为候选点
+    # 那么将不在阈值内的自相关系数置为一个非常小的数，从而不让算法选择不在阈值内的基音周期
+    autocorrelation_of_candidates[n, :] = np.pad(
+        autocorrelation[n, min_peak_threshold : max_peak_threshold], 
+        (min_peak_threshold, max(frame_length - max_peak_threshold, 0)),
+        mode='constant', 
+        constant_values=-30.0,
+    )
+
+dist = -autocorrelation
+cost = np.zeros((num_frames, frame_length))
+path = np.zeros((num_frames, frame_length))
+
+for n in range(num_frames - 1):
+    for j in range(min_peak_threshold, max_peak_threshold):
+        # f0 = sample_rate / candidate
+        cost[n + 1, j] = dist[n + 1, j] + np.min(
+            cost[n, :] + np.abs(sample_rate / np.arange(frame_length) - sample_rate / j)
+        )
+        path[n + 1, j] = np.argmin(
+            cost[n, :] + np.abs(sample_rate / np.arange(frame_length) - sample_rate / j)
+        )
+
+l_hat = np.zeros(num_frames, dtype=np.int32)
+l_hat[num_frames - 1] = np.argmin(cost[num_frames - 1, :])
+
+for n in range(num_frames - 2, -1, -1):
+    l_hat[n] = path[n + 1, l_hat[n + 1]]
+
+f0 = sample_rate / l_hat
+