commit 8fee98d39d12a0556cb48c9e75d22dfc619acf32 Author: kejingfan Date: Thu Sep 5 12:56:46 2024 +0800 first commit diff --git a/Courseware/第1章 绪论-第1讲.pdf b/Courseware/第1章 绪论-第1讲.pdf new file mode 100644 index 0000000..cf5a36b Binary files /dev/null and b/Courseware/第1章 绪论-第1讲.pdf differ diff --git a/Courseware/第3章 正交变换-第1讲.pdf b/Courseware/第3章 正交变换-第1讲.pdf new file mode 100644 index 0000000..371329c Binary files /dev/null and b/Courseware/第3章 正交变换-第1讲.pdf differ diff --git a/Courseware/第3章 正交变换-第2讲.pdf b/Courseware/第3章 正交变换-第2讲.pdf new file mode 100644 index 0000000..82188ee Binary files /dev/null and b/Courseware/第3章 正交变换-第2讲.pdf differ diff --git a/Courseware/第3章 正交变换-第3讲.pdf b/Courseware/第3章 正交变换-第3讲.pdf new file mode 100644 index 0000000..4077995 Binary files /dev/null and b/Courseware/第3章 正交变换-第3讲.pdf differ diff --git a/Courseware/第4章 图像增强-第1讲.pdf b/Courseware/第4章 图像增强-第1讲.pdf new file mode 100644 index 0000000..389175f Binary files /dev/null and b/Courseware/第4章 图像增强-第1讲.pdf differ diff --git a/Courseware/第4章 图像增强-第2讲.pdf b/Courseware/第4章 图像增强-第2讲.pdf new file mode 100644 index 0000000..3cb12cb Binary files /dev/null and b/Courseware/第4章 图像增强-第2讲.pdf differ diff --git a/Courseware/第4章 图像增强-第3讲.pdf b/Courseware/第4章 图像增强-第3讲.pdf new file mode 100644 index 0000000..a31b4dd Binary files /dev/null and b/Courseware/第4章 图像增强-第3讲.pdf differ diff --git a/Courseware/第5章 图像编码-第1讲.pdf b/Courseware/第5章 图像编码-第1讲.pdf new file mode 100644 index 0000000..b705fa5 Binary files /dev/null and b/Courseware/第5章 图像编码-第1讲.pdf differ diff --git a/Courseware/第5章 图像编码-第2讲.pdf b/Courseware/第5章 图像编码-第2讲.pdf new file mode 100644 index 0000000..f39226f Binary files /dev/null and b/Courseware/第5章 图像编码-第2讲.pdf differ diff --git a/Courseware/第5章 图像编码-第3讲.pdf b/Courseware/第5章 图像编码-第3讲.pdf new file mode 100644 index 0000000..16f21c7 Binary files /dev/null and b/Courseware/第5章 图像编码-第3讲.pdf differ diff --git a/Courseware/第6章 图像复原-第1讲.pdf b/Courseware/第6章 图像复原-第1讲.pdf new file mode 100644 index 0000000..fd03d39 Binary files /dev/null and b/Courseware/第6章 图像复原-第1讲.pdf differ diff --git a/Courseware/第8章 图像分析-第1讲.pdf b/Courseware/第8章 图像分析-第1讲.pdf new file mode 100644 index 0000000..75de7f1 Binary files /dev/null and b/Courseware/第8章 图像分析-第1讲.pdf differ diff --git a/Lab/Lab1/source/Lenna-二值化.png b/Lab/Lab1/source/Lenna-二值化.png new file mode 100644 index 0000000..cfb2325 Binary files /dev/null and b/Lab/Lab1/source/Lenna-二值化.png differ diff --git a/Lab/Lab1/source/Lenna-伪彩色-医学.png b/Lab/Lab1/source/Lenna-伪彩色-医学.png new file mode 100644 index 0000000..c0bfdd5 Binary files /dev/null and b/Lab/Lab1/source/Lenna-伪彩色-医学.png differ diff --git a/Lab/Lab1/source/Lenna-伪彩色-遥感.png b/Lab/Lab1/source/Lenna-伪彩色-遥感.png new file mode 100644 index 0000000..cf51eb5 Binary files /dev/null and b/Lab/Lab1/source/Lenna-伪彩色-遥感.png differ diff --git a/Lab/Lab1/source/Lenna-傅里叶变换.png b/Lab/Lab1/source/Lenna-傅里叶变换.png new file mode 100644 index 0000000..7757f52 Binary files /dev/null and b/Lab/Lab1/source/Lenna-傅里叶变换.png differ diff --git a/Lab/Lab1/source/Lenna-加椒盐-中值滤波.png b/Lab/Lab1/source/Lenna-加椒盐-中值滤波.png new file mode 100644 index 0000000..03dd650 Binary files /dev/null and b/Lab/Lab1/source/Lenna-加椒盐-中值滤波.png differ diff --git a/Lab/Lab1/source/Lenna-加椒盐-均值滤波.png b/Lab/Lab1/source/Lenna-加椒盐-均值滤波.png new file mode 100644 index 0000000..2b00d73 Binary files /dev/null and b/Lab/Lab1/source/Lenna-加椒盐-均值滤波.png differ diff --git a/Lab/Lab1/source/Lenna-加高斯-中值滤波.png b/Lab/Lab1/source/Lenna-加高斯-中值滤波.png new file mode 100644 index 0000000..866192e Binary files /dev/null and b/Lab/Lab1/source/Lenna-加高斯-中值滤波.png differ diff --git a/Lab/Lab1/source/Lenna-加高斯-均值滤波.png b/Lab/Lab1/source/Lenna-加高斯-均值滤波.png new file mode 100644 index 0000000..4ad0789 Binary files /dev/null and b/Lab/Lab1/source/Lenna-加高斯-均值滤波.png differ diff --git a/Lab/Lab1/source/Lenna-垂直翻转.png b/Lab/Lab1/source/Lenna-垂直翻转.png new file mode 100644 index 0000000..164538e Binary files /dev/null and b/Lab/Lab1/source/Lenna-垂直翻转.png differ diff --git a/Lab/Lab1/source/Lenna-对角翻转.png b/Lab/Lab1/source/Lenna-对角翻转.png new file mode 100644 index 0000000..5c3e929 Binary files /dev/null and b/Lab/Lab1/source/Lenna-对角翻转.png differ diff --git a/Lab/Lab1/source/Lenna-水平翻转.png b/Lab/Lab1/source/Lenna-水平翻转.png new file mode 100644 index 0000000..68eee8e Binary files /dev/null and b/Lab/Lab1/source/Lenna-水平翻转.png differ diff --git a/Lab/Lab1/source/Lenna-沃尔什变换.png b/Lab/Lab1/source/Lenna-沃尔什变换.png new file mode 100644 index 0000000..1ccef97 Binary files /dev/null and b/Lab/Lab1/source/Lenna-沃尔什变换.png differ diff --git a/Lab/Lab1/source/Lenna-灰度反转.png b/Lab/Lab1/source/Lenna-灰度反转.png new file mode 100644 index 0000000..83c9af9 Binary files /dev/null and b/Lab/Lab1/source/Lenna-灰度反转.png differ diff --git a/Lab/Lab1/source/Lenna-离散余弦变换.png b/Lab/Lab1/source/Lenna-离散余弦变换.png new file mode 100644 index 0000000..848c807 Binary files /dev/null and b/Lab/Lab1/source/Lenna-离散余弦变换.png differ diff --git a/Lab/Lab1/source/Lenna-边缘提取-LoG.png b/Lab/Lab1/source/Lenna-边缘提取-LoG.png new file mode 100644 index 0000000..bf2290a Binary files /dev/null and b/Lab/Lab1/source/Lenna-边缘提取-LoG.png differ diff --git a/Lab/Lab1/source/Lenna-边缘提取-Prewitte.png b/Lab/Lab1/source/Lenna-边缘提取-Prewitte.png new file mode 100644 index 0000000..f5793e0 Binary files /dev/null and b/Lab/Lab1/source/Lenna-边缘提取-Prewitte.png differ diff --git a/Lab/Lab1/source/Lenna-边缘提取-Roberts.png b/Lab/Lab1/source/Lenna-边缘提取-Roberts.png new file mode 100644 index 0000000..96e770b Binary files /dev/null and b/Lab/Lab1/source/Lenna-边缘提取-Roberts.png differ diff --git a/Lab/Lab1/source/Lenna-边缘提取-Sobel.png b/Lab/Lab1/source/Lenna-边缘提取-Sobel.png new file mode 100644 index 0000000..aa87a28 Binary files /dev/null and b/Lab/Lab1/source/Lenna-边缘提取-Sobel.png differ diff --git a/Lab/Lab1/source/Miss.bmp b/Lab/Lab1/source/Miss.bmp new file mode 100644 index 0000000..68dc9b2 Binary files /dev/null and b/Lab/Lab1/source/Miss.bmp differ diff --git a/Lab/Lab1/source/my_image_dark.bmp b/Lab/Lab1/source/my_image_dark.bmp new file mode 100644 index 0000000..4024fb0 Binary files /dev/null and b/Lab/Lab1/source/my_image_dark.bmp differ diff --git a/Lab/Lab1/source/my_image_light.bmp b/Lab/Lab1/source/my_image_light.bmp new file mode 100644 index 0000000..f6509ca Binary files /dev/null and b/Lab/Lab1/source/my_image_light.bmp differ diff --git a/Lab/Lab1/source/my_image_normal.bmp b/Lab/Lab1/source/my_image_normal.bmp new file mode 100644 index 0000000..04a62fa Binary files /dev/null and b/Lab/Lab1/source/my_image_normal.bmp differ diff --git a/Lab/Lab1/source/偏亮-直方图均衡化.png b/Lab/Lab1/source/偏亮-直方图均衡化.png new file mode 100644 index 0000000..647afc3 Binary files /dev/null and b/Lab/Lab1/source/偏亮-直方图均衡化.png differ diff --git a/Lab/Lab1/source/偏亮-直方图统计.png b/Lab/Lab1/source/偏亮-直方图统计.png new file mode 100644 index 0000000..53aecf8 Binary files /dev/null and b/Lab/Lab1/source/偏亮-直方图统计.png differ diff --git a/Lab/Lab1/source/偏暗-直方图均衡化.png b/Lab/Lab1/source/偏暗-直方图均衡化.png new file mode 100644 index 0000000..1deb85a Binary files /dev/null and b/Lab/Lab1/source/偏暗-直方图均衡化.png differ diff --git a/Lab/Lab1/source/偏暗-直方图统计.png b/Lab/Lab1/source/偏暗-直方图统计.png new file mode 100644 index 0000000..e64f357 Binary files /dev/null and b/Lab/Lab1/source/偏暗-直方图统计.png differ diff --git a/Lab/Lab1/source/实验1-柯劲帆-21281280.md b/Lab/Lab1/source/实验1-柯劲帆-21281280.md new file mode 100644 index 0000000..fbb5c9d --- /dev/null +++ b/Lab/Lab1/source/实验1-柯劲帆-21281280.md @@ -0,0 +1,241 @@ +

实验报告

+ +
+
课程名称:数字图像处理
+
实验题目:实验一
+
学号:21281280
+
姓名:柯劲帆
+
班级:物联网2101班
+
指导老师:安高云
+
报告日期:2024年1月10日
+
+ + + +--- + + + +**目录** + +[TOC] + +--- + + + +# 1. 几何变化 + + + + + + + + + + + + + + + + + +# 2. 灰度反转 + +
原图水平翻转垂直翻转对角翻转
MissLenna-水平翻转Lenna-垂直翻转Lenna-对角翻转
+ + + + + + + + + +即将图片的灰度$x, x\in [0, 255]$转变成$255 - x$。 + + + +# 3. 直方图均衡化 + +这里准备手机拍摄的3张图片,图片内容相同,但是在拍摄的过程中调整亮度,得到3张(偏暗、正常、偏亮)的图片,直方图均衡化处理如下: + +
原图灰度反转
MissLenna-灰度反转
+ + + + + + + + + + + + + + + + + + + + + + + + +
偏暗正常偏亮
原图my_image_darkmy_image_normalmy_image_light
直方图均衡化偏暗-直方图均衡化正常-直方图均衡化偏亮-直方图均衡化
直方图统计偏暗-直方图统计正常-直方图统计偏亮-直方图统计
+ + +可见偏暗的图进行直方图均衡化处理后,明暗分布变均匀,辨识度增加; + +正常的图进行直方图均衡化处理后,结果较为接近原图,明暗分布较为均匀; + +偏亮的图进行直方图均衡化处理后,辨识度增加,但明暗分布不够均匀,不如偏暗的图直方图均衡化处理结果。 + + + +# 4. 图像复原 + + + + + + + + + + + + + + + + + + + + + +
噪声中值滤波均值滤波
原图my_image_normal
加高斯噪声Lenna-加高斯-中值滤波Lenna-加高斯-均值滤波
加椒盐噪声Lenna-加椒盐-中值滤波Lenna-加椒盐-均值滤波
+ +从复原结果来看,均值滤波更容易复原加了高斯噪声的图片;中值滤波更容易复原加了椒盐噪声的图片。 + +均值滤波可以有效滤除图像中的高斯噪声,它通过用像素点周围区域的平均值来替代该像素点的值,由于高斯噪声具有零均值的特点,所以能够有效地减弱噪声;中值滤波可以有效去除图像中的椒盐噪声,它通过用像素点周围区域的中值来替代该像素点的值,由于椒盐噪声往往处于区域灰度值的两端,使用 +中值可以将其排除在外。 + + + +# 5. 边缘提取 + + + + + + + + + + + + + + + + +
原图Sobel算子法Prewitt算子法Roberts算子法LoG算子法
Missmy_image_darkmy_image_normalmy_image_lightmy_image_light
+ +- Sobel算子是一种一阶微分算子,它利用像素邻近区域的梯度值来计算1个像素的梯度,然后根据一定的绝对值来取舍。Sobel算子包含两组3x3的矩阵,分别为横向及纵向模板,将之与图像作平面卷积,即可分别得出横向及纵向的亮度差分近似值。 +- Roberts算子是一个2x2的模板,采用的是对角方向相邻的两个像素之差。 +- Prewitt算子与Sobel算子类似,也是一个3x3的模板,但其权重分配不同。 +- LoG算子是一种高斯差分滤波器,它使用高斯滤波器对图像进行平滑,然后计算图像的二阶导数。 + + + +# 6. 伪彩色 + + + + + + + + + + + + + +伪彩色是基于一定的规则算法为黑白图片上色。伪彩色可以帮助人眼分辨重要的区域。 + + + +# 7. 二值化 + +
原图医学伪彩色遥感伪彩色
MissLenna-伪彩色-医学Lenna-伪彩色
+ + + + + + + + + +即设定一个阈值$s, s\in[0, 255]$,对于原图像像素值$x$,有二值化后图像像素值$y$: +$$ +y = +\left\{\begin{array}{ll} + 0, & x= s +\end{array}\right. +$$ + + +# 8. 图形变换 + +
原图二值化
MissLenna-二值化
+ + + + + + + + + + + + +
原图傅里叶变换沃尔什变换离散余弦变换
MissLenna-傅里叶变换Lenna-沃尔什变换Lenna-离散余弦变换
+ +**傅里叶变换** + +傅里叶变换是将信号从空间域转换到频域的最常用方法。傅里叶变换可以将任意函数表示为不同频率的正弦和余弦函数的叠加。在图形处理中,傅里叶变换可以用于以下应用: + +- 图像频率分析:傅里叶变换可以用于分析图像的频率成分,从而识别图像的结构和特征。 +- 图像压缩:傅里叶变换可以用于将图像从空间域压缩到频域,从而减少图像的存储空间。 +- 图像滤波:傅里叶变换可以用于设计滤波器,从而实现图像增强、锐化、去噪等操作。 + +**沃尔什变换** + +沃尔什变换是一种与傅里叶变换类似的正交变换,但沃尔什函数具有比傅里叶函数更强的能量集中性。这意味着沃尔什变换可以更有效地提取图像的低频成分。在图形处理中,沃尔什变换可以用于以下应用: + +- 图像压缩:沃尔什变换可以用于将图像从空间域压缩到频域,从而减少图像的存储空间。 +- 图像滤波:沃尔什变换可以用于设计滤波器,从而实现图像增强、锐化、去噪等操作。 + +**离散余弦变换** + +离散余弦变换是一种实数域的正交变换。离散余弦变换具有以下特点: + +- 能量集中性:离散余弦变换的系数通常集中在低频区域。 +- 计算效率高:离散余弦变换具有快速算法,计算效率高。 + +在图形处理中,离散余弦变换可以用于以下应用: + +- 图像压缩:离散余弦变换可以用于将图像从空间域压缩到频域,从而减少图像的存储空间。 +- 图像编码:离散余弦变换可以用于图像编码,从而实现图像的传输和存储。 +- 图像恢复:离散余弦变换可以用于图像恢复,从而修复图像的损坏。 diff --git a/Lab/Lab1/source/正常-直方图均衡化.png b/Lab/Lab1/source/正常-直方图均衡化.png new file mode 100644 index 0000000..576a87b Binary files /dev/null and b/Lab/Lab1/source/正常-直方图均衡化.png differ diff --git a/Lab/Lab1/source/正常-直方图统计.png b/Lab/Lab1/source/正常-直方图统计.png new file mode 100644 index 0000000..e932e33 Binary files /dev/null and b/Lab/Lab1/source/正常-直方图统计.png differ diff --git a/Lab/Lab1/test_images/Miss.bmp b/Lab/Lab1/test_images/Miss.bmp new file mode 100644 index 0000000..68dc9b2 Binary files /dev/null and b/Lab/Lab1/test_images/Miss.bmp differ diff --git a/Lab/Lab1/test_images/my_image_dark.bmp b/Lab/Lab1/test_images/my_image_dark.bmp new file mode 100644 index 0000000..4024fb0 Binary files /dev/null and b/Lab/Lab1/test_images/my_image_dark.bmp differ diff --git a/Lab/Lab1/test_images/my_image_light.bmp b/Lab/Lab1/test_images/my_image_light.bmp new file mode 100644 index 0000000..f6509ca Binary files /dev/null and b/Lab/Lab1/test_images/my_image_light.bmp differ diff --git a/Lab/Lab1/test_images/my_image_normal.bmp b/Lab/Lab1/test_images/my_image_normal.bmp new file mode 100644 index 0000000..04a62fa Binary files /dev/null and b/Lab/Lab1/test_images/my_image_normal.bmp differ diff --git a/Lab/Lab1/实验1-柯劲帆-21281280.pdf b/Lab/Lab1/实验1-柯劲帆-21281280.pdf new file mode 100644 index 0000000..9246457 Binary files /dev/null and b/Lab/Lab1/实验1-柯劲帆-21281280.pdf differ diff --git a/Lab/Lab2/code/read_bmp.py b/Lab/Lab2/code/read_bmp.py new file mode 100644 index 0000000..05c14cf --- /dev/null +++ b/Lab/Lab2/code/read_bmp.py @@ -0,0 +1,226 @@ +import numpy as np +from struct import unpack +from PIL import Image, ImageTk +import sys +import tkinter +import tkinter.filedialog + +class BmpData: + def __init__(self, file_path:str): + with open(file_path, "rb") as file: + self.file = file + + self.bfType = unpack(" np.ndarray: + if (self.bfOffBits == 0x36): # 16/24位图像不需要调色板,起始位置就等于0x36 + return None + color_alette_size = 2 ** int(self.biBitCount) # 多少字节调色板颜色就有2^n个 + color_palette = np.zeros((color_alette_size, 3), dtype=np.int32) + self.file.seek(0x36) + for i in range(color_alette_size): + b = unpack("B", self.file.read(1))[0] + g = unpack("B", self.file.read(1))[0] + r = unpack("B", self.file.read(1))[0] + alpha = unpack("B", self.file.read(1))[0] + color_palette[i][0] = b + color_palette[i][1] = g + color_palette[i][2] = r + return color_palette + + def get_numpy_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + img_np = np.zeros((biHeight, self.biWidth, 3), dtype=np.int32) + self.file.seek(self.bfOffBits) + for x in range(biHeight): + row_byte_count = ((self.biWidth * self.biBitCount + 31) >> 5) << 2 + row_bits = self.file.read(row_byte_count) + row_bits = ''.join(format(byte, '08b') for byte in row_bits) + for y in range(self.biWidth): + pixel_data = row_bits[y * self.biBitCount: (y + 1) * self.biBitCount] + if self.biHeight > 0: # 图像倒立 + img_np[biHeight - 1 - x][y] = self.get_RGB(pixel_data) + else: + img_np[x][y] = self.get_RGB(pixel_data) + return img_np + + def get_gray_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + gray_img = np.dot(self.img_np.reshape((biHeight * self.biWidth, 3)).astype(np.float32), + [0.299, 0.587, 0.114]).astype(np.int32) + gray_img = gray_img.reshape((biHeight, self.biWidth)) + return gray_img + + def get_RGB(self, pixel_data:str): + if len(pixel_data) <= 8: + color_index = int(pixel_data, 2) + return self.color_palette[color_index] + elif len(pixel_data) == 16: + b = int(pixel_data[1:6], 2) * 8 + g = int(pixel_data[6:11], 2) * 8 + r = int(pixel_data[11:16], 2) * 8 + return [r, g, b] + elif len(pixel_data) == 24: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + return [r, g, b] + elif len(pixel_data) == 32: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + alpha = int(pixel_data[24:32], 2) + return [r, g, b] + + + def equalize(self, level:int): + biHeight = abs(self.biHeight) + self.hist = np.zeros(256, dtype=np.int32) + max_value = self.gray.max() + min_value = self.gray.min() + gap = (max_value - min_value + 1) / level + for x in range(biHeight): + for y in range(self.biWidth): + self.hist[self.gray[x, y]] += 1 + hist = np.zeros(level, dtype=np.float32) + for i in range(level): + hist[i] = np.sum(self.hist[min_value + int(i * gap) : min_value + int((i + 1) * gap)]) + hist /= biHeight * self.biWidth + for i in range(1, level): + hist[i] += hist[i - 1] + hist *= level + hist = np.around(hist) + hist /= level + hist = np.floor(hist * 255).astype(np.int32) + self.equalized_img = np.zeros_like(self.gray) + self.equalized_hist = np.zeros(256, dtype=np.int32) + for x in range(biHeight): + for y in range(self.biWidth): + self.equalized_img[x, y] = hist[int((self.gray[x, y] - min_value) / gap)] + self.equalized_hist[self.equalized_img[x, y]] += 1 + return self.equalized_img, self.hist, self.equalized_hist + + def save_equalized_img(self, save_path:str): + self.save_img(image=self.equalized_img, save_path=save_path) + + def save_img(self, image:np.ndarray, save_path:str): + with open(save_path, "wb") as file: + file.write(int(self.bfType).to_bytes(2, byteorder='little')) # 0x00 文件类型 + file.write(int(0x36 + 0x100 * 4 + self.biWidth * abs(self.biHeight)).to_bytes(4, byteorder='little')) # 0x02 文件大小 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x06 保留,必须设置为0 + file.write(int(0x36 + 0x100 * 4).to_bytes(4, byteorder='little')) # 0x0a 从头到位图数据的偏移 + file.write(int(40).to_bytes(4, byteorder='little')) # 0x0e 信息头的大小 + file.write(int(self.biWidth).to_bytes(4, byteorder='little')) # 0x12 图像的宽度 + file.write(int(self.biHeight).to_bytes(4, byteorder='little')) # 0x16 图像的高度 + file.write(int(self.biPlanes).to_bytes(2, byteorder='little')) # 0x1a 颜色平面数 + file.write(int(8).to_bytes(2, byteorder='little')) # 0x1c 比特数/像素数 + file.write(int(self.biCompression).to_bytes(4, byteorder='little')) # 0x1e 压缩类型 + file.write(int(self.biSizeImage).to_bytes(4, byteorder='little')) # 0x22 位图数据的大小 + file.write(int(self.biXPelsPerMeter).to_bytes(4, byteorder='little')) # 0x26 水平分辨率 + file.write(int(self.biYPelsPerMeter).to_bytes(4, byteorder='little')) # 0x2a 垂直分辨率 + file.write(int(0x100 * 4).to_bytes(4, byteorder='little')) # 0x2e 位图使用的调色板中的颜色索引数 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x32 对图像显示有重要影响的颜色索引数 + + for i in range(256): + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(0).to_bytes(1, byteorder='little')) + + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + if self.biHeight > 0: + file.write(int(image[self.biHeight - 1 - x][y]).to_bytes(1, byteorder='little')) + else: + file.write(int(image[x][y]).to_bytes(1, byteorder='little')) + file.write(b'0' * ((((self.biWidth * 8 + 31) >> 5) << 2) - 8 * self.biWidth)) + + file.close() + + +def choosepic(): + global path_ + path_ = tkinter.filedialog.askopenfilename(title='请选择图片文件', filetypes=[('图片', '.bmp')]) + if path_ == '': + return + img_temp = Image.open(path_).resize((int(256 * 0.8), int(256 * 0.8))) # 图片读取和加载 + img = ImageTk.PhotoImage(img_temp) + label_image1.config(image=img) + label_image1.image = img + + +def equalize(): + if path_ == '': + return + image = BmpData(path_) + # img = Image.fromarray(image.img_np.astype(np.uint8)) + # img.show() + + equalized_img, hist, equalized_hist = image.equalize(8) # 分别为均衡化的图/直方图/均衡化后的直方图 + equalized_img = Image.fromarray(equalized_img.astype(np.uint8)) + # equalized_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_equalized" + new_file_name = '.'.join(name_parts) + image.save_equalized_img(new_file_name) + + equalized_img = equalized_img.resize((int(256 * 0.8), int(256 * 0.8))) + equalized_img = ImageTk.PhotoImage(equalized_img) + label_image2.config(image=equalized_img) + label_image2.image = equalized_img # 处理后的图片的显示 + + +if __name__ == "__main__": + root = tkinter.Tk() + root.title('21281280柯劲帆') # 标题 + width, height = 600, 400 + width_max, height_max = root.maxsize() + s_center = '%dx%d+%d+%d' % (width, height, (width_max - width) / 2, (height_max - height) / 2) # 将页面显示在正中间 + root.geometry(s_center) + root.resizable(width=False, height=False) # 窗口不可移动 + l = tkinter.Label(root, text='实验二', width=60, height=2, fg='black', font=("微软雅黑", 16), anchor=tkinter.CENTER) + l.pack() + + label_image1 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image1.pack() + label_image1.place(x=45, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + label_image2 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image2.place(x=350, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + # 文本按钮 + Image_Input = tkinter.Button(root, text='Choose', command=choosepic) + Image_Input.place(x=105, y=300, width=80, height=30) + + # 直方图均衡化 + Fun1 = tkinter.Button(root, text='直方图均衡化', command=equalize) + Fun1.place(x=265, y=300, width=80, height=30) + + # 退出 + Quit = tkinter.Button(root, text='Quit', command=sys.exit) + Quit.place(x=415, y=300, width=80, height=30) + + end = tkinter.Label(root, text='21281280 柯劲帆', fg='silver', font=("微软雅黑", 10)) + end.place(x=215, y=360, width=200, height=20) + root.mainloop() \ No newline at end of file diff --git a/Lab/Lab2/read_bmp.exe b/Lab/Lab2/read_bmp.exe new file mode 100644 index 0000000..44950e0 Binary files /dev/null and b/Lab/Lab2/read_bmp.exe differ diff --git a/Lab/Lab2/source/1.1.1.1.png b/Lab/Lab2/source/1.1.1.1.png new file mode 100644 index 0000000..9c29a9d Binary files /dev/null and b/Lab/Lab2/source/1.1.1.1.png differ diff --git a/Lab/Lab2/source/1.1.1.2.png b/Lab/Lab2/source/1.1.1.2.png new file mode 100644 index 0000000..7c90e8c Binary files /dev/null and b/Lab/Lab2/source/1.1.1.2.png differ diff --git a/Lab/Lab2/source/1.1.1.3.png b/Lab/Lab2/source/1.1.1.3.png new file mode 100644 index 0000000..f6cb53b Binary files /dev/null and b/Lab/Lab2/source/1.1.1.3.png differ diff --git a/Lab/Lab2/source/3.dark.png b/Lab/Lab2/source/3.dark.png new file mode 100644 index 0000000..1dc83cb Binary files /dev/null and b/Lab/Lab2/source/3.dark.png differ diff --git a/Lab/Lab2/source/3.light.png b/Lab/Lab2/source/3.light.png new file mode 100644 index 0000000..03aeb99 Binary files /dev/null and b/Lab/Lab2/source/3.light.png differ diff --git a/Lab/Lab2/source/3.normal.png b/Lab/Lab2/source/3.normal.png new file mode 100644 index 0000000..214f3de Binary files /dev/null and b/Lab/Lab2/source/3.normal.png differ diff --git a/Lab/Lab2/source/Miss.bmp b/Lab/Lab2/source/Miss.bmp new file mode 100644 index 0000000..68dc9b2 Binary files /dev/null and b/Lab/Lab2/source/Miss.bmp differ diff --git a/Lab/Lab2/source/my_image_dark.bmp b/Lab/Lab2/source/my_image_dark.bmp new file mode 100644 index 0000000..4024fb0 Binary files /dev/null and b/Lab/Lab2/source/my_image_dark.bmp differ diff --git a/Lab/Lab2/source/my_image_dark_equalized.bmp b/Lab/Lab2/source/my_image_dark_equalized.bmp new file mode 100644 index 0000000..12e8f5f Binary files /dev/null and b/Lab/Lab2/source/my_image_dark_equalized.bmp differ diff --git a/Lab/Lab2/source/my_image_light.bmp b/Lab/Lab2/source/my_image_light.bmp new file mode 100644 index 0000000..f6509ca Binary files /dev/null and b/Lab/Lab2/source/my_image_light.bmp differ diff --git a/Lab/Lab2/source/my_image_light_equalized.bmp b/Lab/Lab2/source/my_image_light_equalized.bmp new file mode 100644 index 0000000..9c4beec Binary files /dev/null and b/Lab/Lab2/source/my_image_light_equalized.bmp differ diff --git a/Lab/Lab2/source/my_image_normal.bmp b/Lab/Lab2/source/my_image_normal.bmp new file mode 100644 index 0000000..04a62fa Binary files /dev/null and b/Lab/Lab2/source/my_image_normal.bmp differ diff --git a/Lab/Lab2/source/my_image_normal_equalized.bmp b/Lab/Lab2/source/my_image_normal_equalized.bmp new file mode 100644 index 0000000..d3af799 Binary files /dev/null and b/Lab/Lab2/source/my_image_normal_equalized.bmp differ diff --git a/Lab/Lab2/source/实验2-柯劲帆-21281280.md b/Lab/Lab2/source/实验2-柯劲帆-21281280.md new file mode 100644 index 0000000..96dfa0d --- /dev/null +++ b/Lab/Lab2/source/实验2-柯劲帆-21281280.md @@ -0,0 +1,701 @@ +

实验报告

+ +
+
课程名称:数字图像处理
+
实验题目:直方图均衡化处理计算机实现
+
学号:21281280
+
姓名:柯劲帆
+
班级:物联网2101班
+
指导老师:安高云
+
报告日期:2024年1月10日
+
+ + +--- + + + +**目录** + +[TOC] + +--- + + + +# 1. 直方图均衡化处理程序 + +本实验中我使用Python实现直方图均衡化处理。对于图像的读取、处理和保存,我都使用了按字节进行读写的方式,符合实验要求。 + +## 1.1. BMP格式图片的读写 + +BMP格式图片的数据分为以下部分: + +| 内容 | 大小 | +| :------------------------------- | ------ | +| bmp文件头(bmp file header) | 14字节 | +| 位图信息头(bitmap information) | 40字节 | +| 调色板(color palette) | 可选 | +| 位图数据 | | + +这里使用Lenna的BMP格式图片的十六进制码作为解读用例。 + +![Miss](Miss.bmp) + +### 1.1.1. BMP文件头内容读取 + +BMP文件头内容如下: + +| 内容 | 大小 | 偏移 | Lenna图片 | 备注 | +| ------------------------------ | ----- | ---- | ---------- | ------------------------------------------ | +| bfType 文件类型 | 2字节 | 0x00 | 0x4D42 | 字符显示就是“BM” | +| bfSize 文件大小 | 4字节 | 0x02 | 0x00010438 | | +| bfReserved1 保留 | 2字节 | 0x06 | 0x00 | 必须设置为0 | +| bfReserved2 保留 | 2字节 | 0x08 | 0x00 | 必须设置为0 | +| bfOffBits 从头到位图数据的偏移 | 4字节 | 0x0A | 0x00000436 | = 文件头大小 + 位图信息头大小 + 调色板大小 | + +Lenna图片中数据如下图(使用VS Code的Hex Editor打开): + +![1.1.1.1](1.1.1.1.png) + +因此读取代码为: + +```python +class BmpData: + def __init__(self, file_path:str): + with open(file_path, "rb") as file: + self.file = file + self.bfType = unpack(" np.ndarray: + if (self.bfOffBits == 0x36): # 16/24位图像不需要调色板,起始位置就等于0x36 + return None + color_alette_size = 2 ** int(self.biBitCount) # 多少字节调色板颜色就有2^n个 + color_palette = np.zeros((color_alette_size, 3), dtype=np.int32) + self.file.seek(0x36) + for i in range(color_alette_size): + b = unpack("B", self.file.read(1))[0] + g = unpack("B", self.file.read(1))[0] + r = unpack("B", self.file.read(1))[0] + alpha = unpack("B", self.file.read(1))[0] + color_palette[i][0] = b + color_palette[i][1] = g + color_palette[i][2] = r + return color_palette +``` + +### 1.1.4. BMP位图数据读取 + +接下来是位图数据。由于是8位色图,所以每个像素用1个字节表示,取出每个字节,从调色盘中获取对应的R/G/B/Alpha数值,忽略掉Alpha值,放入三维数组中,就是图片数据了。如果是24位色图,按照BGR的顺序排列,32位色图按照BGRAlpha排列。 + +读取颜色值的代码如下: + +```python + def get_RGB(self, pixel_data:str): + if len(pixel_data) <= 8: + color_index = int(pixel_data, 2) + return self.color_palette[color_index] + elif len(pixel_data) == 16: + b = int(pixel_data[1:6], 2) * 8 + g = int(pixel_data[6:11], 2) * 8 + r = int(pixel_data[11:16], 2) * 8 + return [r, g, b] + elif len(pixel_data) == 24: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + return [r, g, b] + elif len(pixel_data) == 32: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + alpha = int(pixel_data[24:32], 2) + return [r, g, b] +``` + +Lenna图片的biHeight为正数,说明图像倒立,从左下角开始到右上角,以行为主序排列。 + +位图数据排列还有一个规则,就是对齐。 + +Windows默认的扫描的最小单位是4字节,如果数据对齐满足这个值的话对于数据的获取速度等都是有很大的增益的。因此,BMP图像顺应了这个要求,要求每行的数据的长度必须是4的倍数,如果不够需要以0填充,这样可以达到按行的快速存取。 + +每行的的长度为: +$$ +Rowsize = 4 \times \left \lceil \frac{bfOffBits \times biWidth}{32} \right \rceil +$$ +用代码实现为: + +```python +Rowsize = ((biWidth * biBitCount + 31) >> 5) << 2 +``` + +补零的数量就为: +$$ +Rowsize = 4 \times \left \lceil \frac{bfOffBits \times biWidth}{32} \right \rceil - (bfOffBits \times biWidth) +$$ +获取图片三维数组的代码如下: + +```python + def get_numpy_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + img_np = np.zeros((biHeight, self.biWidth, 3), dtype=np.int32) + self.file.seek(self.bfOffBits) + for x in range(biHeight): + row_byte_count = ((self.biWidth * self.biBitCount + 31) >> 5) << 2 + row_bits = self.file.read(row_byte_count) + row_bits = ''.join(format(byte, '08b') for byte in row_bits) + for y in range(self.biWidth): + pixel_data = row_bits[y * self.biBitCount: (y + 1) * self.biBitCount] + if self.biHeight > 0: # 图像倒立 + img_np[biHeight - 1 - x][y] = self.get_RGB(pixel_data) + else: + img_np[x][y] = self.get_RGB(pixel_data) + return img_np +``` + +### 1.1.5. BMP图片的写入 + +将图片三维数组按照BMP格式写入二进制文件即可。这里我以8位色图写入。 + +```python + def save_img(self, image:np.ndarray, save_path:str): + with open(save_path, "wb") as file: + file.write(int(self.bfType).to_bytes(2, byteorder='little')) # 0x00 文件类型 + file.write(int(0x36 + 0x100 * 4 + self.biWidth * abs(self.biHeight)).to_bytes(4, byteorder='little')) # 0x02 文件大小 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x06 保留,必须设置为0 + file.write(int(0x36 + 0x100 * 4).to_bytes(4, byteorder='little')) # 0x0a 从头到位图数据的偏移 + file.write(int(40).to_bytes(4, byteorder='little')) # 0x0e 信息头的大小 + file.write(int(self.biWidth).to_bytes(4, byteorder='little')) # 0x12 图像的宽度 + file.write(int(self.biHeight).to_bytes(4, byteorder='little')) # 0x16 图像的高度 + file.write(int(self.biPlanes).to_bytes(2, byteorder='little')) # 0x1a 颜色平面数 + file.write(int(8).to_bytes(2, byteorder='little')) # 0x1c 比特数/像素数 + file.write(int(self.biCompression).to_bytes(4, byteorder='little')) # 0x1e 压缩类型 + file.write(int(self.biSizeImage).to_bytes(4, byteorder='little')) # 0x22 位图数据的大小 + file.write(int(self.biXPelsPerMeter).to_bytes(4, byteorder='little')) # 0x26 水平分辨率 + file.write(int(self.biYPelsPerMeter).to_bytes(4, byteorder='little')) # 0x2a 垂直分辨率 + file.write(int(0x100 * 4).to_bytes(4, byteorder='little')) # 0x2e 位图使用的调色板中的颜色索引数 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x32 对图像显示有重要影响的颜色索引数 + + for i in range(256): + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(0).to_bytes(1, byteorder='little')) + + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + if self.biHeight > 0: + file.write(int(image[self.biHeight - 1 - x][y]).to_bytes(1, byteorder='little')) + else: + file.write(int(image[x][y]).to_bytes(1, byteorder='little')) + file.write(b'0' * ((((self.biWidth * 8 + 31) >> 5) << 2) - 8 * self.biWidth)) + + file.close() +``` + +## 1.2. 直方图均衡化处理 + +直方图均衡化的步骤如下: + +1. 将彩色图转换为灰度图; +2. 统计每个色阶的像素数,转换为频率; +3. 将各个色阶的频率依次累加,得到前缀和; +4. 将各个色阶的频率前缀和转换到相近的灰度色阶值,作为该色阶内像素的均衡化后的灰度值; +5. 将原图的各个像素变换到对应得到灰度值。 + +### 1.2.1. 灰度化 + +这里灰度化的方法采用 +$$ +grey\space value=0.299\times R + 0.587 \times G + 0.114\times B +$$ +灰度转化代码如下: + +```python + def get_gray_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + gray_img = np.dot(self.img_np.reshape((biHeight * self.biWidth, 3)).astype(np.float32), + [0.299, 0.587, 0.114]).astype(np.int32) + gray_img = gray_img.reshape((biHeight, self.biWidth)) + return gray_img +``` + +### 1.2.2. 直方图均衡化 + +按照步骤,均衡化代码如下: + +```python + def equalize(self, level:int): + biHeight = abs(self.biHeight) + self.hist = np.zeros(256, dtype=np.int32) + max_value = self.gray.max() + min_value = self.gray.min() + gap = (max_value - min_value + 1) / level + for x in range(biHeight): + for y in range(self.biWidth): + self.hist[self.gray[x, y]] += 1 + hist = np.zeros(level, dtype=np.float32) + for i in range(level): + hist[i] = np.sum(self.hist[min_value + int(i * gap) : min_value + int((i + 1) * gap)]) + hist /= biHeight * self.biWidth + for i in range(1, level): + hist[i] += hist[i - 1] + hist *= level + hist = np.around(hist) + hist /= level + hist = np.floor(hist * 255).astype(np.int32) + self.equalized_img = np.zeros_like(self.gray) + self.equalized_hist = np.zeros(256, dtype=np.int32) + for x in range(biHeight): + for y in range(self.biWidth): + self.equalized_img[x, y] = hist[int((self.gray[x, y] - min_value) / gap)] + self.equalized_hist[self.equalized_img[x, y]] += 1 + return self.equalized_img, self.hist, self.equalized_hist +``` + +## 1.3. GUI界面设计和程序逻辑 + +```python +def choosepic(): + global path_ + path_ = tkinter.filedialog.askopenfilename(title='请选择图片文件', filetypes=[('图片', '.bmp')]) + if path_ == '': + return + img_temp = Image.open(path_).resize((int(256 * 0.8), int(256 * 0.8))) # 图片读取和加载 + img = ImageTk.PhotoImage(img_temp) + label_image1.config(image=img) + label_image1.image = img + + +def equalize(): + if path_ == '': + return + image = BmpData(path_) + # img = Image.fromarray(image.img_np.astype(np.uint8)) + # img.show() + + equalized_img, hist, equalized_hist = image.equalize(8) # 分别为均衡化的图/直方图/均衡化后的直方图 + equalized_img = Image.fromarray(equalized_img.astype(np.uint8)) + # equalized_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_equalized" + new_file_name = '.'.join(name_parts) + image.save_equalized_img(new_file_name) + + equalized_img = equalized_img.resize((int(256 * 0.8), int(256 * 0.8))) + equalized_img = ImageTk.PhotoImage(equalized_img) + label_image2.config(image=equalized_img) + label_image2.image = equalized_img # 处理后的图片的显示 + + +if __name__ == "__main__": + root = tkinter.Tk() + root.title('21281280柯劲帆') # 标题 + width, height = 600, 400 + width_max, height_max = root.maxsize() + s_center = '%dx%d+%d+%d' % (width, height, (width_max - width) / 2, (height_max - height) / 2) # 将页面显示在正中间 + root.geometry(s_center) + root.resizable(width=False, height=False) # 窗口不可移动 + l = tkinter.Label(root, text='实验二', width=60, height=2, fg='black', font=("微软雅黑", 16), anchor=tkinter.CENTER) + l.pack() + + label_image1 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image1.pack() + label_image1.place(x=45, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + label_image2 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image2.place(x=350, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + # 文本按钮 + Image_Input = tkinter.Button(root, text='Choose', command=choosepic) + Image_Input.place(x=105, y=300, width=80, height=30) + + # 直方图均衡化 + Fun1 = tkinter.Button(root, text='直方图均衡化', command=equalize) + Fun1.place(x=265, y=300, width=80, height=30) + + # 退出 + Quit = tkinter.Button(root, text='Quit', command=sys.exit) + Quit.place(x=415, y=300, width=80, height=30) + + end = tkinter.Label(root, text='21281280 柯劲帆', fg='silver', font=("微软雅黑", 10)) + end.place(x=215, y=360, width=200, height=20) + root.mainloop() +``` + + + +# 2. 实验过程 + +编好代码后,对Python代码进行封装,变成exe可执行程序。 + +在命令行中配置环境并封装: + +```sh +> pip install pyinstaller +> Pyinstaller -F -w read_bmp.py +``` + +在文件资源管理器窗口中双击exe文件,即可运行。 + + + +# 3. 实验结果及分析 + +这里我准备了手机拍摄的3张图片,图片内容相同,但是在拍摄的过程中调整亮度,得到3张(偏暗、正常、偏亮)的图片,直方图均衡化处理如下: + + + + + + + + + + + + + + + + + + + + + + + + + + +
偏暗正常偏亮
原图my_image_darkmy_image_normalmy_image_light
直方图均衡化过程darknormallight
直方图均衡化结果my_image_dark_equalizedmy_image_normal_equalizedmy_image_light_equalized
+ +可见偏暗的图进行直方图均衡化处理后,辨识度增加; + +正常的图进行直方图均衡化处理后,由于将颜色集中到几个色阶上,所以层次感增强; + +偏亮的图进行直方图均衡化处理后,效果不好,辨识度甚至下降了。 + + + +# 4. 心得体会 + +在本次直方图均衡化处理的数字图像处理实验中,我学习和掌握了以下几点: + +1. 熟练掌握了BMP格式图片的读取和写入,包括文件头、信息头、调色板以及位图数据的解析。这让我对图像文件的格式和结构有了更深入的理解。 +2. 实现了直方图均衡化处理的关键步骤,包括灰度化、计算直方图、直方图均衡化变换等。这让我对直方图均衡化算法的原理有了更清晰的认识。 +3. 通过编程实现直方图均衡化处理,并通过对不同曝光的图片进行处理,观察结果发现:偏暗图片效果好,正常图片层次增强,偏亮图片效果不佳。这让我理解到直方图均衡化处理的适用场景。 +4. 熟练使用Python中的Numpy、PIL等库进行图像处理,并编写GUI界面。这进一步提高了我的编程能力。 +5. 通过把Python代码打包成exe文件,实现可直接运行。这让我掌握了把代码封装成软件产品的方法。 + +通过本次实验,我对数字图像处理理论知识和编程实现能力都得到了提高。 + + + +# 5. 源代码 + +```python +import numpy as np +from struct import unpack +from PIL import Image, ImageTk +import sys +import tkinter +import tkinter.filedialog + +class BmpData: + def __init__(self, file_path:str): + with open(file_path, "rb") as file: + self.file = file + + self.bfType = unpack(" np.ndarray: + if (self.bfOffBits == 0x36): # 16/24位图像不需要调色板,起始位置就等于0x36 + return None + color_alette_size = 2 ** int(self.biBitCount) # 多少字节调色板颜色就有2^n个 + color_palette = np.zeros((color_alette_size, 3), dtype=np.int32) + self.file.seek(0x36) + for i in range(color_alette_size): + b = unpack("B", self.file.read(1))[0] + g = unpack("B", self.file.read(1))[0] + r = unpack("B", self.file.read(1))[0] + alpha = unpack("B", self.file.read(1))[0] + color_palette[i][0] = b + color_palette[i][1] = g + color_palette[i][2] = r + return color_palette + + def get_numpy_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + img_np = np.zeros((biHeight, self.biWidth, 3), dtype=np.int32) + self.file.seek(self.bfOffBits) + for x in range(biHeight): + row_byte_count = ((self.biWidth * self.biBitCount + 31) >> 5) << 2 + row_bits = self.file.read(row_byte_count) + row_bits = ''.join(format(byte, '08b') for byte in row_bits) + for y in range(self.biWidth): + pixel_data = row_bits[y * self.biBitCount: (y + 1) * self.biBitCount] + if self.biHeight > 0: # 图像倒立 + img_np[biHeight - 1 - x][y] = self.get_RGB(pixel_data) + else: + img_np[x][y] = self.get_RGB(pixel_data) + return img_np + + def get_gray_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + gray_img = np.dot(self.img_np.reshape((biHeight * self.biWidth, 3)).astype(np.float32), + [0.299, 0.587, 0.114]).astype(np.int32) + gray_img = gray_img.reshape((biHeight, self.biWidth)) + return gray_img + + def get_RGB(self, pixel_data:str): + if len(pixel_data) <= 8: + color_index = int(pixel_data, 2) + return self.color_palette[color_index] + elif len(pixel_data) == 16: + b = int(pixel_data[1:6], 2) * 8 + g = int(pixel_data[6:11], 2) * 8 + r = int(pixel_data[11:16], 2) * 8 + return [r, g, b] + elif len(pixel_data) == 24: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + return [r, g, b] + elif len(pixel_data) == 32: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + alpha = int(pixel_data[24:32], 2) + return [r, g, b] + + + def equalize(self, level:int): + biHeight = abs(self.biHeight) + self.hist = np.zeros(256, dtype=np.int32) + max_value = self.gray.max() + min_value = self.gray.min() + gap = (max_value - min_value + 1) / level + for x in range(biHeight): + for y in range(self.biWidth): + self.hist[self.gray[x, y]] += 1 + hist = np.zeros(level, dtype=np.float32) + for i in range(level): + hist[i] = np.sum(self.hist[min_value + int(i * gap) : min_value + int((i + 1) * gap)]) + hist /= biHeight * self.biWidth + for i in range(1, level): + hist[i] += hist[i - 1] + hist *= level + hist = np.around(hist) + hist /= level + hist = np.floor(hist * 255).astype(np.int32) + self.equalized_img = np.zeros_like(self.gray) + self.equalized_hist = np.zeros(256, dtype=np.int32) + for x in range(biHeight): + for y in range(self.biWidth): + self.equalized_img[x, y] = hist[int((self.gray[x, y] - min_value) / gap)] + self.equalized_hist[self.equalized_img[x, y]] += 1 + return self.equalized_img, self.hist, self.equalized_hist + + def save_equalized_img(self, save_path:str): + self.save_img(image=self.equalized_img, save_path=save_path) + + def save_img(self, image:np.ndarray, save_path:str): + with open(save_path, "wb") as file: + file.write(int(self.bfType).to_bytes(2, byteorder='little')) # 0x00 文件类型 + file.write(int(0x36 + 0x100 * 4 + self.biWidth * abs(self.biHeight)).to_bytes(4, byteorder='little')) # 0x02 文件大小 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x06 保留,必须设置为0 + file.write(int(0x36 + 0x100 * 4).to_bytes(4, byteorder='little')) # 0x0a 从头到位图数据的偏移 + file.write(int(40).to_bytes(4, byteorder='little')) # 0x0e 信息头的大小 + file.write(int(self.biWidth).to_bytes(4, byteorder='little')) # 0x12 图像的宽度 + file.write(int(self.biHeight).to_bytes(4, byteorder='little')) # 0x16 图像的高度 + file.write(int(self.biPlanes).to_bytes(2, byteorder='little')) # 0x1a 颜色平面数 + file.write(int(8).to_bytes(2, byteorder='little')) # 0x1c 比特数/像素数 + file.write(int(self.biCompression).to_bytes(4, byteorder='little')) # 0x1e 压缩类型 + file.write(int(self.biSizeImage).to_bytes(4, byteorder='little')) # 0x22 位图数据的大小 + file.write(int(self.biXPelsPerMeter).to_bytes(4, byteorder='little')) # 0x26 水平分辨率 + file.write(int(self.biYPelsPerMeter).to_bytes(4, byteorder='little')) # 0x2a 垂直分辨率 + file.write(int(0x100 * 4).to_bytes(4, byteorder='little')) # 0x2e 位图使用的调色板中的颜色索引数 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x32 对图像显示有重要影响的颜色索引数 + + for i in range(256): + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(0).to_bytes(1, byteorder='little')) + + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + if self.biHeight > 0: + file.write(int(image[self.biHeight - 1 - x][y]).to_bytes(1, byteorder='little')) + else: + file.write(int(image[x][y]).to_bytes(1, byteorder='little')) + file.write(b'0' * ((((self.biWidth * 8 + 31) >> 5) << 2) - 8 * self.biWidth)) + + file.close() + + +def choosepic(): + global path_ + path_ = tkinter.filedialog.askopenfilename(title='请选择图片文件', filetypes=[('图片', '.jpg .png .bmp .jpeg')]) + if path_ == '': + return + img_temp = Image.open(path_).resize((int(256 * 0.8), int(256 * 0.8))) # 图片读取和加载 + img = ImageTk.PhotoImage(img_temp) + label_image1.config(image=img) + label_image1.image = img + + +def equalize(): + if path_ == '': + return + image = BmpData(path_) + # img = Image.fromarray(image.img_np.astype(np.uint8)) + # img.show() + + equalized_img, hist, equalized_hist = image.equalize(8) # 分别为均衡化的图/直方图/均衡化后的直方图 + equalized_img = Image.fromarray(equalized_img.astype(np.uint8)) + # equalized_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_equalized" + new_file_name = '.'.join(name_parts) + image.save_equalized_img(new_file_name) + + equalized_img = equalized_img.resize((int(256 * 0.8), int(256 * 0.8))) + equalized_img = ImageTk.PhotoImage(equalized_img) + label_image2.config(image=equalized_img) + label_image2.image = equalized_img # 处理后的图片的显示 + + +if __name__ == "__main__": + root = tkinter.Tk() + root.title('21281280柯劲帆') # 标题 + width, height = 600, 400 + width_max, height_max = root.maxsize() + s_center = '%dx%d+%d+%d' % (width, height, (width_max - width) / 2, (height_max - height) / 2) # 将页面显示在正中间 + root.geometry(s_center) + root.resizable(width=False, height=False) # 窗口不可移动 + l = tkinter.Label(root, text='实验二', width=60, height=2, fg='black', font=("微软雅黑", 16), anchor=tkinter.CENTER) + l.pack() + + label_image1 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image1.pack() + label_image1.place(x=45, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + label_image2 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image2.place(x=350, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + # 文本按钮 + Image_Input = tkinter.Button(root, text='Choose', command=choosepic) + Image_Input.place(x=105, y=300, width=80, height=30) + + # 直方图均衡化 + Fun1 = tkinter.Button(root, text='直方图均衡化', command=equalize) + Fun1.place(x=265, y=300, width=80, height=30) + + # 退出 + Quit = tkinter.Button(root, text='Quit', command=sys.exit) + Quit.place(x=415, y=300, width=80, height=30) + + end = tkinter.Label(root, text='21281280 柯劲帆', fg='silver', font=("微软雅黑", 10)) + end.place(x=215, y=360, width=200, height=20) + root.mainloop() +``` + diff --git a/Lab/Lab2/source/实验2-柯劲帆-21281280.pdf b/Lab/Lab2/source/实验2-柯劲帆-21281280.pdf new file mode 100644 index 0000000..1f9662d Binary files /dev/null and b/Lab/Lab2/source/实验2-柯劲帆-21281280.pdf differ diff --git a/Lab/Lab2/test_images/Miss.bmp b/Lab/Lab2/test_images/Miss.bmp new file mode 100644 index 0000000..68dc9b2 Binary files /dev/null and b/Lab/Lab2/test_images/Miss.bmp differ diff --git a/Lab/Lab2/test_images/my_image_dark.bmp b/Lab/Lab2/test_images/my_image_dark.bmp new file mode 100644 index 0000000..4024fb0 Binary files /dev/null and b/Lab/Lab2/test_images/my_image_dark.bmp differ diff --git a/Lab/Lab2/test_images/my_image_dark_equalized.bmp b/Lab/Lab2/test_images/my_image_dark_equalized.bmp new file mode 100644 index 0000000..12e8f5f Binary files /dev/null and b/Lab/Lab2/test_images/my_image_dark_equalized.bmp differ diff --git a/Lab/Lab2/test_images/my_image_light.bmp b/Lab/Lab2/test_images/my_image_light.bmp new file mode 100644 index 0000000..f6509ca Binary files /dev/null and b/Lab/Lab2/test_images/my_image_light.bmp differ diff --git a/Lab/Lab2/test_images/my_image_light_equalized.bmp b/Lab/Lab2/test_images/my_image_light_equalized.bmp new file mode 100644 index 0000000..9c4beec Binary files /dev/null and b/Lab/Lab2/test_images/my_image_light_equalized.bmp differ diff --git a/Lab/Lab2/test_images/my_image_normal.bmp b/Lab/Lab2/test_images/my_image_normal.bmp new file mode 100644 index 0000000..04a62fa Binary files /dev/null and b/Lab/Lab2/test_images/my_image_normal.bmp differ diff --git a/Lab/Lab2/test_images/my_image_normal_equalized.bmp b/Lab/Lab2/test_images/my_image_normal_equalized.bmp new file mode 100644 index 0000000..d3af799 Binary files /dev/null and b/Lab/Lab2/test_images/my_image_normal_equalized.bmp differ diff --git a/Lab/Lab2/实验2-柯劲帆-21281280.pdf b/Lab/Lab2/实验2-柯劲帆-21281280.pdf new file mode 100644 index 0000000..1eb2dce Binary files /dev/null and b/Lab/Lab2/实验2-柯劲帆-21281280.pdf differ diff --git a/Lab/Lab3/code/add_noise.py b/Lab/Lab3/code/add_noise.py new file mode 100644 index 0000000..a96bf2b --- /dev/null +++ b/Lab/Lab3/code/add_noise.py @@ -0,0 +1,15 @@ +import skimage +from PIL import Image +import numpy as np + + +if __name__ == '__main__': + origin = skimage.img_as_float(Image.open('./test_images/my_image_normal.bmp')) + noisy = skimage.util.random_noise(origin, mode='gaussian', var=0.01) + noisy = (noisy * 255).astype(np.uint8) + skimage.io.imsave('./test_images/my_image_gaussiannoise.bmp', noisy) + + origin = skimage.img_as_float(Image.open('./test_images/my_image_normal.bmp')) + noisy = skimage.util.random_noise(origin, mode='salt') + noisy = (noisy * 255).astype(np.uint8) + skimage.io.imsave('./test_images/my_image_saltnoise.bmp', noisy) diff --git a/Lab/Lab3/code/read_bmp.py b/Lab/Lab3/code/read_bmp.py new file mode 100644 index 0000000..d8e467a --- /dev/null +++ b/Lab/Lab3/code/read_bmp.py @@ -0,0 +1,319 @@ +import numpy as np +from struct import unpack +from PIL import Image, ImageTk +import sys +import tkinter +import tkinter.filedialog + +class BmpData: + def __init__(self, file_path:str): + with open(file_path, "rb") as file: + self.file = file + + self.bfType = unpack(" np.ndarray: + if (self.bfOffBits == 0x36): # 16/24位图像不需要调色板,起始位置就等于0x36 + return None + color_alette_size = 2 ** int(self.biBitCount) # 多少字节调色板颜色就有2^n个 + color_palette = np.zeros((color_alette_size, 3), dtype=np.int32) + self.file.seek(0x36) + for i in range(color_alette_size): + b = unpack("B", self.file.read(1))[0] + g = unpack("B", self.file.read(1))[0] + r = unpack("B", self.file.read(1))[0] + alpha = unpack("B", self.file.read(1))[0] + color_palette[i][0] = b + color_palette[i][1] = g + color_palette[i][2] = r + return color_palette + + def get_numpy_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + img_np = np.zeros((biHeight, self.biWidth, 3), dtype=np.int32) + self.file.seek(self.bfOffBits) + for x in range(biHeight): + row_byte_count = ((self.biWidth * self.biBitCount + 31) >> 5) << 2 + row_bits = self.file.read(row_byte_count) + row_bits = ''.join(format(byte, '08b') for byte in row_bits) + for y in range(self.biWidth): + pixel_data = row_bits[y * self.biBitCount: (y + 1) * self.biBitCount] + if self.biHeight > 0: # 图像倒立 + img_np[biHeight - 1 - x][y] = self.get_RGB(pixel_data) + else: + img_np[x][y] = self.get_RGB(pixel_data) + return img_np + + def get_gray_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + gray_img = np.dot(self.img_np.reshape((biHeight * self.biWidth, 3)).astype(np.float32), + [0.299, 0.587, 0.114]).astype(np.int32) + gray_img = gray_img.reshape((biHeight, self.biWidth)) + return gray_img + + def get_RGB(self, pixel_data:str): + if len(pixel_data) <= 8: + color_index = int(pixel_data, 2) + return self.color_palette[color_index] + elif len(pixel_data) == 16: + b = int(pixel_data[1:6], 2) * 8 + g = int(pixel_data[6:11], 2) * 8 + r = int(pixel_data[11:16], 2) * 8 + return [r, g, b] + elif len(pixel_data) == 24: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + return [r, g, b] + elif len(pixel_data) == 32: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + alpha = int(pixel_data[24:32], 2) + return [r, g, b] + + + def equalize(self, level:int): + biHeight = abs(self.biHeight) + self.hist = np.zeros(256, dtype=np.int32) + max_value = self.gray.max() + min_value = self.gray.min() + gap = (max_value - min_value + 1) / level + for x in range(biHeight): + for y in range(self.biWidth): + self.hist[self.gray[x, y]] += 1 + hist = np.zeros(level, dtype=np.float32) + for i in range(level): + hist[i] = np.sum(self.hist[min_value + int(i * gap) : min_value + int((i + 1) * gap)]) + hist /= biHeight * self.biWidth + for i in range(1, level): + hist[i] += hist[i - 1] + hist *= level + hist = np.around(hist) + hist /= level + hist = np.floor(hist * 255).astype(np.int32) + self.equalized_img = np.zeros_like(self.gray) + self.equalized_hist = np.zeros(256, dtype=np.int32) + for x in range(biHeight): + for y in range(self.biWidth): + self.equalized_img[x, y] = hist[int((self.gray[x, y] - min_value) / gap)] + self.equalized_hist[self.equalized_img[x, y]] += 1 + return self.equalized_img, self.hist, self.equalized_hist + + + def save_equalized_img(self, save_path:str): + self.save_img(image=self.equalized_img, save_path=save_path) + + + def medianfilter(self): + self.medianfiltered_img = np.zeros_like(self.gray) + padded_img = np.zeros((abs(self.biHeight) + 2, self.biWidth + 2), dtype=np.int16) + padded_img[1:-1, 1:-1] = self.gray + padded_img[0, 1:-1] = self.gray[0, :] + padded_img[-1, 1:-1] = self.gray[-1, :] + padded_img[1:-1, 0] = self.gray[:, 0] + padded_img[1:-1, -1] = self.gray[:, -1] + padded_img[0][0] = self.gray[0][0] + padded_img[0][-1] = self.gray[0][-1] + padded_img[-1][0] = self.gray[-1][0] + padded_img[-1][-1] = self.gray[-1][-1] + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + self.medianfiltered_img[x][y] = np.around(np.median(padded_img[x:x+3, y:y+3])) + return self.medianfiltered_img + + + def save_medianfiltered_img(self, save_path:str): + self.save_img(image=self.medianfiltered_img, save_path=save_path) + + + def meanfilter(self): + self.meanfiltered_img = np.zeros_like(self.gray) + padded_img = np.zeros((abs(self.biHeight) + 2, self.biWidth + 2), dtype=np.int16) + padded_img[1:-1, 1:-1] = self.gray + padded_img[0, 1:-1] = self.gray[0, :] + padded_img[-1, 1:-1] = self.gray[-1, :] + padded_img[1:-1, 0] = self.gray[:, 0] + padded_img[1:-1, -1] = self.gray[:, -1] + padded_img[0][0] = self.gray[0][0] + padded_img[0][-1] = self.gray[0][-1] + padded_img[-1][0] = self.gray[-1][0] + padded_img[-1][-1] = self.gray[-1][-1] + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + self.meanfiltered_img[x][y] = np.around(np.mean(padded_img[x:x+3, y:y+3])) + return self.meanfiltered_img + + + def save_meanfiltered_img(self, save_path:str): + self.save_img(image=self.meanfiltered_img, save_path=save_path) + + + def save_img(self, image:np.ndarray, save_path:str): + with open(save_path, "wb") as file: + file.write(int(self.bfType).to_bytes(2, byteorder='little')) # 0x00 文件类型 + file.write(int(0x36 + 0x100 * 4 + self.biWidth * abs(self.biHeight)).to_bytes(4, byteorder='little')) # 0x02 文件大小 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x06 保留,必须设置为0 + file.write(int(0x36 + 0x100 * 4).to_bytes(4, byteorder='little')) # 0x0a 从头到位图数据的偏移 + file.write(int(40).to_bytes(4, byteorder='little')) # 0x0e 信息头的大小 + file.write(int(self.biWidth).to_bytes(4, byteorder='little')) # 0x12 图像的宽度 + file.write(int(self.biHeight).to_bytes(4, byteorder='little')) # 0x16 图像的高度 + file.write(int(self.biPlanes).to_bytes(2, byteorder='little')) # 0x1a 颜色平面数 + file.write(int(8).to_bytes(2, byteorder='little')) # 0x1c 比特数/像素数 + file.write(int(self.biCompression).to_bytes(4, byteorder='little')) # 0x1e 压缩类型 + file.write(int(self.biSizeImage).to_bytes(4, byteorder='little')) # 0x22 位图数据的大小 + file.write(int(self.biXPelsPerMeter).to_bytes(4, byteorder='little')) # 0x26 水平分辨率 + file.write(int(self.biYPelsPerMeter).to_bytes(4, byteorder='little')) # 0x2a 垂直分辨率 + file.write(int(0x100 * 4).to_bytes(4, byteorder='little')) # 0x2e 位图使用的调色板中的颜色索引数 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x32 对图像显示有重要影响的颜色索引数 + + for i in range(256): + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(0).to_bytes(1, byteorder='little')) + + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + if self.biHeight > 0: + file.write(int(image[self.biHeight - 1 - x][y]).to_bytes(1, byteorder='little')) + else: + file.write(int(image[x][y]).to_bytes(1, byteorder='little')) + file.write(b'0' * ((((self.biWidth * 8 + 31) >> 5) << 2) - 8 * self.biWidth)) + + file.close() + + +def choosepic(): + global path_ + path_ = tkinter.filedialog.askopenfilename(title='请选择图片文件', filetypes=[('图片', '.bmp')]) + if path_ == '': + return + img_temp = Image.open(path_).resize((int(256 * 0.8), int(256 * 0.8))) # 图片读取和加载 + img = ImageTk.PhotoImage(img_temp) + label_image1.config(image=img) + label_image1.image = img + + +def equalize(): + if path_ == '': + return + image = BmpData(path_) + # img = Image.fromarray(image.img_np.astype(np.uint8)) + # img.show() + + equalized_img, hist, equalized_hist = image.equalize(8) # 分别为均衡化的图/直方图/均衡化后的直方图 + equalized_img = Image.fromarray(equalized_img.astype(np.uint8)) + # equalized_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_equalized" + new_file_name = '.'.join(name_parts) + image.save_equalized_img(new_file_name) + + equalized_img = equalized_img.resize((int(256 * 0.8), int(256 * 0.8))) + equalized_img = ImageTk.PhotoImage(equalized_img) + label_image2.config(image=equalized_img) + label_image2.image = equalized_img # 处理后的图片的显示 + + +def medianfilter(): + if path_ == '': + return + image = BmpData(path_) + medianfiltered_img = image.medianfilter() + medianfiltered_img = Image.fromarray(medianfiltered_img.astype(np.uint8)) + # medianfiltered_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_medianfiltered" + new_file_name = '.'.join(name_parts) + image.save_medianfiltered_img(new_file_name) + + medianfiltered_img = medianfiltered_img.resize((int(256 * 0.8), int(256 * 0.8))) + medianfiltered_img = ImageTk.PhotoImage(medianfiltered_img) + label_image2.config(image=medianfiltered_img) + label_image2.image = medianfiltered_img # 处理后的图片的显示 + + + +def meanfilter(): + if path_ == '': + return + image = BmpData(path_) + meanfiltered_img = image.meanfilter() + meanfiltered_img = Image.fromarray(meanfiltered_img.astype(np.uint8)) + # meanfiltered_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_meanfiltered" + new_file_name = '.'.join(name_parts) + image.save_meanfiltered_img(new_file_name) + + meanfiltered_img = meanfiltered_img.resize((int(256 * 0.8), int(256 * 0.8))) + meanfiltered_img = ImageTk.PhotoImage(meanfiltered_img) + label_image2.config(image=meanfiltered_img) + label_image2.image = meanfiltered_img # 处理后的图片的显示 + + +if __name__ == "__main__": + root = tkinter.Tk() + root.title('21281280柯劲帆') # 标题 + width, height = 600, 400 + width_max, height_max = root.maxsize() + s_center = '%dx%d+%d+%d' % (width, height, (width_max - width) / 2, (height_max - height) / 2) # 将页面显示在正中间 + root.geometry(s_center) + root.resizable(width=False, height=False) # 窗口不可移动 + l = tkinter.Label(root, text='实验三', width=60, height=2, fg='black', font=("微软雅黑", 16), anchor=tkinter.CENTER) + l.pack() + + label_image1 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image1.pack() + label_image1.place(x=45, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + label_image2 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image2.place(x=350, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + # 文本按钮 + Image_Input = tkinter.Button(root, text='Choose', command=choosepic) + Image_Input.place(x=50, y=300, width=90, height=30) + + # 直方图均衡化 + Fun1 = tkinter.Button(root, text='直方图均衡化', command=equalize) + Fun1.place(x=150, y=300, width=90, height=30) + + # 中值滤波 + Fun1 = tkinter.Button(root, text='中值滤波', command=medianfilter) # 添加对应按钮 + Fun1.place(x=250, y=300, width=90, height=30) + + # 均值滤波 + Fun2 = tkinter.Button(root, text='均值滤波', command=meanfilter) # 添加对应按钮 + Fun2.place(x=350, y=300, width=90, height=30) + + # 退出 + Quit = tkinter.Button(root, text='Quit', command=sys.exit) + Quit.place(x=450, y=300, width=90, height=30) + + end = tkinter.Label(root, text='21281280 柯劲帆', fg='silver', font=("微软雅黑", 10)) + end.place(x=215, y=360, width=200, height=20) + root.mainloop() \ No newline at end of file diff --git a/Lab/Lab3/read_bmp.exe b/Lab/Lab3/read_bmp.exe new file mode 100644 index 0000000..40649c9 Binary files /dev/null and b/Lab/Lab3/read_bmp.exe differ diff --git a/Lab/Lab3/source/gaussian_mean.png b/Lab/Lab3/source/gaussian_mean.png new file mode 100644 index 0000000..f2927f9 Binary files /dev/null and b/Lab/Lab3/source/gaussian_mean.png differ diff --git a/Lab/Lab3/source/gaussian_median.png b/Lab/Lab3/source/gaussian_median.png new file mode 100644 index 0000000..4cd7066 Binary files /dev/null and b/Lab/Lab3/source/gaussian_median.png differ diff --git a/Lab/Lab3/source/my_image_gaussiannoise.bmp b/Lab/Lab3/source/my_image_gaussiannoise.bmp new file mode 100644 index 0000000..67a7d91 Binary files /dev/null and b/Lab/Lab3/source/my_image_gaussiannoise.bmp differ diff --git a/Lab/Lab3/source/my_image_gaussiannoise_meanfiltered.bmp b/Lab/Lab3/source/my_image_gaussiannoise_meanfiltered.bmp new file mode 100644 index 0000000..44ee42e Binary files /dev/null and b/Lab/Lab3/source/my_image_gaussiannoise_meanfiltered.bmp differ diff --git a/Lab/Lab3/source/my_image_gaussiannoise_medianfiltered.bmp b/Lab/Lab3/source/my_image_gaussiannoise_medianfiltered.bmp new file mode 100644 index 0000000..db8adaf Binary files /dev/null and b/Lab/Lab3/source/my_image_gaussiannoise_medianfiltered.bmp differ diff --git a/Lab/Lab3/source/my_image_normal.bmp b/Lab/Lab3/source/my_image_normal.bmp new file mode 100644 index 0000000..04a62fa Binary files /dev/null and b/Lab/Lab3/source/my_image_normal.bmp differ diff --git a/Lab/Lab3/source/my_image_normal_meanfiltered.bmp b/Lab/Lab3/source/my_image_normal_meanfiltered.bmp new file mode 100644 index 0000000..2d75607 Binary files /dev/null and b/Lab/Lab3/source/my_image_normal_meanfiltered.bmp differ diff --git a/Lab/Lab3/source/my_image_normal_medianfiltered.bmp b/Lab/Lab3/source/my_image_normal_medianfiltered.bmp new file mode 100644 index 0000000..1471c57 Binary files /dev/null and b/Lab/Lab3/source/my_image_normal_medianfiltered.bmp differ diff --git a/Lab/Lab3/source/my_image_saltnoise.bmp b/Lab/Lab3/source/my_image_saltnoise.bmp new file mode 100644 index 0000000..7a323b5 Binary files /dev/null and b/Lab/Lab3/source/my_image_saltnoise.bmp differ diff --git a/Lab/Lab3/source/my_image_saltnoise_meanfiltered.bmp b/Lab/Lab3/source/my_image_saltnoise_meanfiltered.bmp new file mode 100644 index 0000000..a42b690 Binary files /dev/null and b/Lab/Lab3/source/my_image_saltnoise_meanfiltered.bmp differ diff --git a/Lab/Lab3/source/my_image_saltnoise_medianfiltered.bmp b/Lab/Lab3/source/my_image_saltnoise_medianfiltered.bmp new file mode 100644 index 0000000..5b729a7 Binary files /dev/null and b/Lab/Lab3/source/my_image_saltnoise_medianfiltered.bmp differ diff --git a/Lab/Lab3/source/normal_mean.png b/Lab/Lab3/source/normal_mean.png new file mode 100644 index 0000000..5e1f55f Binary files /dev/null and b/Lab/Lab3/source/normal_mean.png differ diff --git a/Lab/Lab3/source/normal_median.png b/Lab/Lab3/source/normal_median.png new file mode 100644 index 0000000..1151db0 Binary files /dev/null and b/Lab/Lab3/source/normal_median.png differ diff --git a/Lab/Lab3/source/salt_mean.png b/Lab/Lab3/source/salt_mean.png new file mode 100644 index 0000000..3a96294 Binary files /dev/null and b/Lab/Lab3/source/salt_mean.png differ diff --git a/Lab/Lab3/source/salt_median.png b/Lab/Lab3/source/salt_median.png new file mode 100644 index 0000000..2adc55d Binary files /dev/null and b/Lab/Lab3/source/salt_median.png differ diff --git a/Lab/Lab3/source/实验3-柯劲帆-21281280.md b/Lab/Lab3/source/实验3-柯劲帆-21281280.md new file mode 100644 index 0000000..e5e9626 --- /dev/null +++ b/Lab/Lab3/source/实验3-柯劲帆-21281280.md @@ -0,0 +1,601 @@ +

实验报告

+ +
+
课程名称:数字图像处理
+
实验题目:均值滤波、中值滤波的计算机实现
+
学号:21281280
+
姓名:柯劲帆
+
班级:物联网2101班
+
指导老师:安高云
+
报告日期:2024年1月10日
+
+ + + +--- + + + +**目录** + +[TOC] + +--- + + + +# 1. 均值、中值滤波程序 + +本实验中我使用Python实现均值、中值滤波。对于图像的读取、处理和保存,我都使用了按字节进行读写的方式,符合实验要求。 + +BMP格式图片的读写已经写在实验二中,在此不再赘述。 + +## 1.1. 均值滤波 + +均值滤波就是将图片中某一像素点的值用该点和其周围九宫格内的八个点的均值来替代。这样可以有效滤去高斯噪声。 + +如果直接进行滤波,滤波结果尺寸将会缩减为$(originalHeight - 2) \times (originalWidth - 2)$。因此,需要先将原图像填充至$(originalHeight + 2) \times (originalWidth + 2)$,再进行滤波,滤波后的尺寸才能保持$originalHeight \times originalWidth$。 + +这里填充的内容选择重复原图片边缘的像素点。 + +代码实现如下: + +```python + def meanfilter(self): + self.meanfiltered_img = np.zeros_like(self.gray) + padded_img = np.zeros((abs(self.biHeight) + 2, self.biWidth + 2), dtype=np.int16) + padded_img[1:-1, 1:-1] = self.gray + padded_img[0, 1:-1] = self.gray[0, :] + padded_img[-1, 1:-1] = self.gray[-1, :] + padded_img[1:-1, 0] = self.gray[:, 0] + padded_img[1:-1, -1] = self.gray[:, -1] + padded_img[0][0] = self.gray[0][0] + padded_img[0][-1] = self.gray[0][-1] + padded_img[-1][0] = self.gray[-1][0] + padded_img[-1][-1] = self.gray[-1][-1] + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + self.meanfiltered_img[x][y] = np.around(np.mean(padded_img[x:x+3, y:y+3])) + return self.meanfiltered_img +``` + +## 1.2. 中值滤波 + +中值滤波就是将图片中某一像素点的值用该点和其周围九宫格内的八个点的中值来替代。这样可以有效滤去椒盐噪声。 + +填充的方法与均值滤波相同。 + +代码实现如下: + +```python + def medianfilter(self): + self.medianfiltered_img = np.zeros_like(self.gray) + padded_img = np.zeros((abs(self.biHeight) + 2, self.biWidth + 2), dtype=np.int16) + padded_img[1:-1, 1:-1] = self.gray + padded_img[0, 1:-1] = self.gray[0, :] + padded_img[-1, 1:-1] = self.gray[-1, :] + padded_img[1:-1, 0] = self.gray[:, 0] + padded_img[1:-1, -1] = self.gray[:, -1] + padded_img[0][0] = self.gray[0][0] + padded_img[0][-1] = self.gray[0][-1] + padded_img[-1][0] = self.gray[-1][0] + padded_img[-1][-1] = self.gray[-1][-1] + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + self.medianfiltered_img[x][y] = np.around(np.median(padded_img[x:x+3, y:y+3])) + return self.medianfiltered_img +``` + +## 1.3. GUI界面设计和程序逻辑 + +由于相比实验二多了两个功能,UI界面和控件函数需要新增和修改。 + +新增和修改的代码如下: + +```python +def medianfilter(): + if path_ == '': + return + image = BmpData(path_) + medianfiltered_img = image.medianfilter() + medianfiltered_img = Image.fromarray(medianfiltered_img.astype(np.uint8)) + # medianfiltered_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_medianfiltered" + new_file_name = '.'.join(name_parts) + image.save_medianfiltered_img(new_file_name) + + medianfiltered_img = medianfiltered_img.resize((int(256 * 0.8), int(256 * 0.8))) + medianfiltered_img = ImageTk.PhotoImage(medianfiltered_img) + label_image2.config(image=medianfiltered_img) + label_image2.image = medianfiltered_img # 处理后的图片的显示 + + + +def meanfilter(): + if path_ == '': + return + image = BmpData(path_) + meanfiltered_img = image.meanfilter() + meanfiltered_img = Image.fromarray(meanfiltered_img.astype(np.uint8)) + # meanfiltered_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_meanfiltered" + new_file_name = '.'.join(name_parts) + image.save_meanfiltered_img(new_file_name) + + meanfiltered_img = meanfiltered_img.resize((int(256 * 0.8), int(256 * 0.8))) + meanfiltered_img = ImageTk.PhotoImage(meanfiltered_img) + label_image2.config(image=meanfiltered_img) + label_image2.image = meanfiltered_img # 处理后的图片的显示 + + +if __name__ == "__main__": + root = tkinter.Tk() + root.title('21281280柯劲帆') # 标题 + width, height = 600, 400 + width_max, height_max = root.maxsize() + s_center = '%dx%d+%d+%d' % (width, height, (width_max - width) / 2, (height_max - height) / 2) # 将页面显示在正中间 + root.geometry(s_center) + root.resizable(width=False, height=False) # 窗口不可移动 + l = tkinter.Label(root, text='实验三', width=60, height=2, fg='black', font=("微软雅黑", 16), anchor=tkinter.CENTER) + l.pack() + + label_image1 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image1.pack() + label_image1.place(x=45, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + label_image2 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image2.place(x=350, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + # 文本按钮 + Image_Input = tkinter.Button(root, text='Choose', command=choosepic) + Image_Input.place(x=50, y=300, width=90, height=30) + + # 直方图均衡化 + Fun1 = tkinter.Button(root, text='直方图均衡化', command=equalize) + Fun1.place(x=150, y=300, width=90, height=30) + + # 中值滤波 + Fun1 = tkinter.Button(root, text='中值滤波', command=medianfilter) # 添加对应按钮 + Fun1.place(x=250, y=300, width=90, height=30) + + # 均值滤波 + Fun2 = tkinter.Button(root, text='均值滤波', command=meanfilter) # 添加对应按钮 + Fun2.place(x=350, y=300, width=90, height=30) + + # 退出 + Quit = tkinter.Button(root, text='Quit', command=sys.exit) + Quit.place(x=450, y=300, width=90, height=30) + + end = tkinter.Label(root, text='21281280 柯劲帆', fg='silver', font=("微软雅黑", 10)) + end.place(x=215, y=360, width=200, height=20) + root.mainloop() +``` + + + +# 2. 实验过程 + +编好代码后,对Python代码进行封装,变成exe可执行程序。 + +在命令行中配置环境并封装: + +```sh +> pip install pyinstaller +> Pyinstaller -F -w read_bmp.py +``` + +在文件资源管理器窗口中双击exe文件,即可运行。 + + + +# 3. 实验结果及分析 + +这里我准备了手机拍摄的1张图片,对其添加高斯噪声和椒盐噪声: + +```python +import skimage +from PIL import Image +import numpy as np + +if __name__ == '__main__': + origin = skimage.img_as_float(Image.open('./test_images/my_image_normal.bmp')) + noisy = skimage.util.random_noise(origin, mode='gaussian', var=0.01) + noisy = (noisy * 255).astype(np.uint8) + skimage.io.imsave('./test_images/my_image_gaussiannoise.bmp', noisy) + + origin = skimage.img_as_float(Image.open('./test_images/my_image_normal.bmp')) + noisy = skimage.util.random_noise(origin, mode='salt') + noisy = (noisy * 255).astype(np.uint8) + skimage.io.imsave('./test_images/my_image_saltnoise.bmp', noisy) +``` + +进行中值滤波、均值滤波过程和结果如下: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
正常加高斯噪声加椒盐噪声
原图my_image_normalmy_image_gaussiannoisemy_image_saltnoise
中值滤波结果my_image_normal_medianfilteredmy_image_gaussiannoise_medianfilteredmy_image_saltnoise_medianfiltered
均值滤波结果my_image_normal_meanfilteredmy_image_gaussiannoise_meanfilteredmy_image_saltnoise_meanfiltered
中值滤波过程normal_mediangaussian_mediansalt_median
均值滤波过程normal_meangaussian_meansalt_mean
+ + +可见正常的图进行中值滤波后,细节有所丢失,蓝色斑点几乎消失;进行均值滤波后,蓝色斑点有所保留; + +加了高斯噪声的图进行中值滤波和均值滤波的效果都并不好,还是很模糊; + +加了椒盐噪声的图进行中值滤波后,噪声能明显去除;进行均值滤波,噪声去除效果不佳。 + + + +# 4. 心得体会 + +通过本次实验,我掌握了图像的均值滤波和中值滤波的原理及实现方法。 + +均值滤波可以有效滤除图像中的高斯噪声,它通过用像素点周围区域的平均值来替代该像素点的值,由于高斯噪声具有零均值的特点,所以能够有效地减弱噪声;中值滤波可以有效去除图像中的椒盐噪声,它通过用像素点周围区域的中值来替代该像素点的值,由于椒盐噪声往往处于区域灰度值的两端,使用中值可以将其排除在外。 + +通过测试不同的图像,我观察到均值滤波对正常图像会造成一定的模糊,而中值滤波可以很好地保持边缘细节。所以在不需要强烈平滑图像的情况下,中值滤波更好。当图像包含不同类型的噪声时,需要权衡使用均值滤波还是中值滤波。 + +通过本次实验,我对数字图像处理滤波方法有了更深入的理解。 + + + +# 5. 源代码 + +```python +import numpy as np +from struct import unpack +from PIL import Image, ImageTk +import sys +import tkinter +import tkinter.filedialog + +class BmpData: + def __init__(self, file_path:str): + with open(file_path, "rb") as file: + self.file = file + + self.bfType = unpack(" np.ndarray: + if (self.bfOffBits == 0x36): # 16/24位图像不需要调色板,起始位置就等于0x36 + return None + color_alette_size = 2 ** int(self.biBitCount) # 多少字节调色板颜色就有2^n个 + color_palette = np.zeros((color_alette_size, 3), dtype=np.int32) + self.file.seek(0x36) + for i in range(color_alette_size): + b = unpack("B", self.file.read(1))[0] + g = unpack("B", self.file.read(1))[0] + r = unpack("B", self.file.read(1))[0] + alpha = unpack("B", self.file.read(1))[0] + color_palette[i][0] = b + color_palette[i][1] = g + color_palette[i][2] = r + return color_palette + + def get_numpy_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + img_np = np.zeros((biHeight, self.biWidth, 3), dtype=np.int32) + self.file.seek(self.bfOffBits) + for x in range(biHeight): + row_byte_count = ((self.biWidth * self.biBitCount + 31) >> 5) << 2 + row_bits = self.file.read(row_byte_count) + row_bits = ''.join(format(byte, '08b') for byte in row_bits) + for y in range(self.biWidth): + pixel_data = row_bits[y * self.biBitCount: (y + 1) * self.biBitCount] + if self.biHeight > 0: # 图像倒立 + img_np[biHeight - 1 - x][y] = self.get_RGB(pixel_data) + else: + img_np[x][y] = self.get_RGB(pixel_data) + return img_np + + def get_gray_img(self) -> np.ndarray: + biHeight = abs(self.biHeight) + gray_img = np.dot(self.img_np.reshape((biHeight * self.biWidth, 3)).astype(np.float32), + [0.299, 0.587, 0.114]).astype(np.int32) + gray_img = gray_img.reshape((biHeight, self.biWidth)) + return gray_img + + def get_RGB(self, pixel_data:str): + if len(pixel_data) <= 8: + color_index = int(pixel_data, 2) + return self.color_palette[color_index] + elif len(pixel_data) == 16: + b = int(pixel_data[1:6], 2) * 8 + g = int(pixel_data[6:11], 2) * 8 + r = int(pixel_data[11:16], 2) * 8 + return [r, g, b] + elif len(pixel_data) == 24: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + return [r, g, b] + elif len(pixel_data) == 32: + b = int(pixel_data[0:8], 2) + g = int(pixel_data[8:16], 2) + r = int(pixel_data[16:24], 2) + alpha = int(pixel_data[24:32], 2) + return [r, g, b] + + + def equalize(self, level:int): + biHeight = abs(self.biHeight) + self.hist = np.zeros(256, dtype=np.int32) + max_value = self.gray.max() + min_value = self.gray.min() + gap = (max_value - min_value + 1) / level + for x in range(biHeight): + for y in range(self.biWidth): + self.hist[self.gray[x, y]] += 1 + hist = np.zeros(level, dtype=np.float32) + for i in range(level): + hist[i] = np.sum(self.hist[min_value + int(i * gap) : min_value + int((i + 1) * gap)]) + hist /= biHeight * self.biWidth + for i in range(1, level): + hist[i] += hist[i - 1] + hist *= level + hist = np.around(hist) + hist /= level + hist = np.floor(hist * 255).astype(np.int32) + self.equalized_img = np.zeros_like(self.gray) + self.equalized_hist = np.zeros(256, dtype=np.int32) + for x in range(biHeight): + for y in range(self.biWidth): + self.equalized_img[x, y] = hist[int((self.gray[x, y] - min_value) / gap)] + self.equalized_hist[self.equalized_img[x, y]] += 1 + return self.equalized_img, self.hist, self.equalized_hist + + + def save_equalized_img(self, save_path:str): + self.save_img(image=self.equalized_img, save_path=save_path) + + + def medianfilter(self): + self.medianfiltered_img = np.zeros_like(self.gray) + padded_img = np.zeros((abs(self.biHeight) + 2, self.biWidth + 2), dtype=np.int16) + padded_img[1:-1, 1:-1] = self.gray + padded_img[0, 1:-1] = self.gray[0, :] + padded_img[-1, 1:-1] = self.gray[-1, :] + padded_img[1:-1, 0] = self.gray[:, 0] + padded_img[1:-1, -1] = self.gray[:, -1] + padded_img[0][0] = self.gray[0][0] + padded_img[0][-1] = self.gray[0][-1] + padded_img[-1][0] = self.gray[-1][0] + padded_img[-1][-1] = self.gray[-1][-1] + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + self.medianfiltered_img[x][y] = np.around(np.median(padded_img[x:x+3, y:y+3])) + return self.medianfiltered_img + + + def save_medianfiltered_img(self, save_path:str): + self.save_img(image=self.medianfiltered_img, save_path=save_path) + + + def meanfilter(self): + self.meanfiltered_img = np.zeros_like(self.gray) + padded_img = np.zeros((abs(self.biHeight) + 2, self.biWidth + 2), dtype=np.int16) + padded_img[1:-1, 1:-1] = self.gray + padded_img[0, 1:-1] = self.gray[0, :] + padded_img[-1, 1:-1] = self.gray[-1, :] + padded_img[1:-1, 0] = self.gray[:, 0] + padded_img[1:-1, -1] = self.gray[:, -1] + padded_img[0][0] = self.gray[0][0] + padded_img[0][-1] = self.gray[0][-1] + padded_img[-1][0] = self.gray[-1][0] + padded_img[-1][-1] = self.gray[-1][-1] + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + self.meanfiltered_img[x][y] = np.around(np.mean(padded_img[x:x+3, y:y+3])) + return self.meanfiltered_img + + + def save_meanfiltered_img(self, save_path:str): + self.save_img(image=self.meanfiltered_img, save_path=save_path) + + + def save_img(self, image:np.ndarray, save_path:str): + with open(save_path, "wb") as file: + file.write(int(self.bfType).to_bytes(2, byteorder='little')) # 0x00 文件类型 + file.write(int(0x36 + 0x100 * 4 + self.biWidth * abs(self.biHeight)).to_bytes(4, byteorder='little')) # 0x02 文件大小 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x06 保留,必须设置为0 + file.write(int(0x36 + 0x100 * 4).to_bytes(4, byteorder='little')) # 0x0a 从头到位图数据的偏移 + file.write(int(40).to_bytes(4, byteorder='little')) # 0x0e 信息头的大小 + file.write(int(self.biWidth).to_bytes(4, byteorder='little')) # 0x12 图像的宽度 + file.write(int(self.biHeight).to_bytes(4, byteorder='little')) # 0x16 图像的高度 + file.write(int(self.biPlanes).to_bytes(2, byteorder='little')) # 0x1a 颜色平面数 + file.write(int(8).to_bytes(2, byteorder='little')) # 0x1c 比特数/像素数 + file.write(int(self.biCompression).to_bytes(4, byteorder='little')) # 0x1e 压缩类型 + file.write(int(self.biSizeImage).to_bytes(4, byteorder='little')) # 0x22 位图数据的大小 + file.write(int(self.biXPelsPerMeter).to_bytes(4, byteorder='little')) # 0x26 水平分辨率 + file.write(int(self.biYPelsPerMeter).to_bytes(4, byteorder='little')) # 0x2a 垂直分辨率 + file.write(int(0x100 * 4).to_bytes(4, byteorder='little')) # 0x2e 位图使用的调色板中的颜色索引数 + file.write(int(0).to_bytes(4, byteorder='little')) # 0x32 对图像显示有重要影响的颜色索引数 + + for i in range(256): + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(i).to_bytes(1, byteorder='little')) + file.write(int(0).to_bytes(1, byteorder='little')) + + for x in range(abs(self.biHeight)): + for y in range(self.biWidth): + if self.biHeight > 0: + file.write(int(image[self.biHeight - 1 - x][y]).to_bytes(1, byteorder='little')) + else: + file.write(int(image[x][y]).to_bytes(1, byteorder='little')) + file.write(b'0' * ((((self.biWidth * 8 + 31) >> 5) << 2) - 8 * self.biWidth)) + + file.close() + + +def choosepic(): + global path_ + path_ = tkinter.filedialog.askopenfilename(title='请选择图片文件', filetypes=[('图片', '.bmp')]) + if path_ == '': + return + img_temp = Image.open(path_).resize((int(256 * 0.8), int(256 * 0.8))) # 图片读取和加载 + img = ImageTk.PhotoImage(img_temp) + label_image1.config(image=img) + label_image1.image = img + + +def equalize(): + if path_ == '': + return + image = BmpData(path_) + # img = Image.fromarray(image.img_np.astype(np.uint8)) + # img.show() + + equalized_img, hist, equalized_hist = image.equalize(8) # 分别为均衡化的图/直方图/均衡化后的直方图 + equalized_img = Image.fromarray(equalized_img.astype(np.uint8)) + # equalized_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_equalized" + new_file_name = '.'.join(name_parts) + image.save_equalized_img(new_file_name) + + equalized_img = equalized_img.resize((int(256 * 0.8), int(256 * 0.8))) + equalized_img = ImageTk.PhotoImage(equalized_img) + label_image2.config(image=equalized_img) + label_image2.image = equalized_img # 处理后的图片的显示 + + +def medianfilter(): + if path_ == '': + return + image = BmpData(path_) + medianfiltered_img = image.medianfilter() + medianfiltered_img = Image.fromarray(medianfiltered_img.astype(np.uint8)) + # medianfiltered_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_medianfiltered" + new_file_name = '.'.join(name_parts) + image.save_medianfiltered_img(new_file_name) + + medianfiltered_img = medianfiltered_img.resize((int(256 * 0.8), int(256 * 0.8))) + medianfiltered_img = ImageTk.PhotoImage(medianfiltered_img) + label_image2.config(image=medianfiltered_img) + label_image2.image = medianfiltered_img # 处理后的图片的显示 + + + +def meanfilter(): + if path_ == '': + return + image = BmpData(path_) + meanfiltered_img = image.meanfilter() + meanfiltered_img = Image.fromarray(meanfiltered_img.astype(np.uint8)) + # meanfiltered_img.show() + + name_parts = path_.split('.') + name_parts[-2] += "_meanfiltered" + new_file_name = '.'.join(name_parts) + image.save_meanfiltered_img(new_file_name) + + meanfiltered_img = meanfiltered_img.resize((int(256 * 0.8), int(256 * 0.8))) + meanfiltered_img = ImageTk.PhotoImage(meanfiltered_img) + label_image2.config(image=meanfiltered_img) + label_image2.image = meanfiltered_img # 处理后的图片的显示 + + +if __name__ == "__main__": + root = tkinter.Tk() + root.title('21281280柯劲帆') # 标题 + width, height = 600, 400 + width_max, height_max = root.maxsize() + s_center = '%dx%d+%d+%d' % (width, height, (width_max - width) / 2, (height_max - height) / 2) # 将页面显示在正中间 + root.geometry(s_center) + root.resizable(width=False, height=False) # 窗口不可移动 + l = tkinter.Label(root, text='实验三', width=60, height=2, fg='black', font=("微软雅黑", 16), anchor=tkinter.CENTER) + l.pack() + + label_image1 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image1.pack() + label_image1.place(x=45, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + label_image2 = tkinter.Label(root, width=int(256 * 0.8), height=int(256 * 0.8), bg='whitesmoke', anchor=tkinter.NE) + label_image2.place(x=350, y=70, width=int(256 * 0.8), height=int(256 * 0.8)) + + # 文本按钮 + Image_Input = tkinter.Button(root, text='Choose', command=choosepic) + Image_Input.place(x=50, y=300, width=90, height=30) + + # 直方图均衡化 + Fun1 = tkinter.Button(root, text='直方图均衡化', command=equalize) + Fun1.place(x=150, y=300, width=90, height=30) + + # 中值滤波 + Fun1 = tkinter.Button(root, text='中值滤波', command=medianfilter) # 添加对应按钮 + Fun1.place(x=250, y=300, width=90, height=30) + + # 均值滤波 + Fun2 = tkinter.Button(root, text='均值滤波', command=meanfilter) # 添加对应按钮 + Fun2.place(x=350, y=300, width=90, height=30) + + # 退出 + Quit = tkinter.Button(root, text='Quit', command=sys.exit) + Quit.place(x=450, y=300, width=90, height=30) + + end = tkinter.Label(root, text='21281280 柯劲帆', fg='silver', font=("微软雅黑", 10)) + end.place(x=215, y=360, width=200, height=20) + root.mainloop() +``` + diff --git a/Lab/Lab3/test_images/my_image_gaussiannoise.bmp b/Lab/Lab3/test_images/my_image_gaussiannoise.bmp new file mode 100644 index 0000000..67a7d91 Binary files /dev/null and b/Lab/Lab3/test_images/my_image_gaussiannoise.bmp differ diff --git a/Lab/Lab3/test_images/my_image_gaussiannoise_meanfiltered.bmp b/Lab/Lab3/test_images/my_image_gaussiannoise_meanfiltered.bmp new file mode 100644 index 0000000..44ee42e Binary files /dev/null and b/Lab/Lab3/test_images/my_image_gaussiannoise_meanfiltered.bmp differ diff --git a/Lab/Lab3/test_images/my_image_gaussiannoise_medianfiltered.bmp b/Lab/Lab3/test_images/my_image_gaussiannoise_medianfiltered.bmp new file mode 100644 index 0000000..db8adaf Binary files /dev/null and b/Lab/Lab3/test_images/my_image_gaussiannoise_medianfiltered.bmp differ diff --git a/Lab/Lab3/test_images/my_image_normal.bmp b/Lab/Lab3/test_images/my_image_normal.bmp new file mode 100644 index 0000000..04a62fa Binary files /dev/null and b/Lab/Lab3/test_images/my_image_normal.bmp differ diff --git a/Lab/Lab3/test_images/my_image_normal_meanfiltered.bmp b/Lab/Lab3/test_images/my_image_normal_meanfiltered.bmp new file mode 100644 index 0000000..2d75607 Binary files /dev/null and b/Lab/Lab3/test_images/my_image_normal_meanfiltered.bmp differ diff --git a/Lab/Lab3/test_images/my_image_normal_medianfiltered.bmp b/Lab/Lab3/test_images/my_image_normal_medianfiltered.bmp new file mode 100644 index 0000000..1471c57 Binary files /dev/null and b/Lab/Lab3/test_images/my_image_normal_medianfiltered.bmp differ diff --git a/Lab/Lab3/test_images/my_image_saltnoise.bmp b/Lab/Lab3/test_images/my_image_saltnoise.bmp new file mode 100644 index 0000000..7a323b5 Binary files /dev/null and b/Lab/Lab3/test_images/my_image_saltnoise.bmp differ diff --git a/Lab/Lab3/test_images/my_image_saltnoise_meanfiltered.bmp b/Lab/Lab3/test_images/my_image_saltnoise_meanfiltered.bmp new file mode 100644 index 0000000..a42b690 Binary files /dev/null and b/Lab/Lab3/test_images/my_image_saltnoise_meanfiltered.bmp differ diff --git a/Lab/Lab3/test_images/my_image_saltnoise_medianfiltered.bmp b/Lab/Lab3/test_images/my_image_saltnoise_medianfiltered.bmp new file mode 100644 index 0000000..5b729a7 Binary files /dev/null and b/Lab/Lab3/test_images/my_image_saltnoise_medianfiltered.bmp differ diff --git a/Lab/Lab3/实验3-柯劲帆-21281280.pdf b/Lab/Lab3/实验3-柯劲帆-21281280.pdf new file mode 100644 index 0000000..a8765bd Binary files /dev/null and b/Lab/Lab3/实验3-柯劲帆-21281280.pdf differ diff --git a/Lab/Lab4/code/tune-clip-in-cub/get_loader.py b/Lab/Lab4/code/tune-clip-in-cub/get_loader.py new file mode 100644 index 0000000..202105f --- /dev/null +++ b/Lab/Lab4/code/tune-clip-in-cub/get_loader.py @@ -0,0 +1,57 @@ +from PIL import Image +from torch.utils.data import Dataset +import os +import clip + + +class Classes: + def __init__(self, classes_file): + self.class2index = {} + self.index2class = {} + classes = open(classes_file).readlines() + classes = [line.strip() for line in classes] + for row in classes: + index, birdname = row.split(' ') + index = int(index) + birdname = (birdname.split('.'))[1].replace('_', ' ') + self.class2index['A photo of ' + birdname] = index - 1 + self.index2class[index - 1] = 'A photo of ' + birdname + def __len__(self): + return len(self.class2index) + def get_class(self, num: int): + return self.index2class[num] if (num in self.index2class) else None + def get_id(self, class_name: str): + return ( + self.class2index[class_name] if (class_name in self.class2index) else None + ) + + +class MyDataset(Dataset): + def __init__(self, processor, train=True): + classes = Classes('/home/kejingfan/cub/classes.txt') + class_list = [classes.get_class(i) for i in range(len(classes))] + self.tokens = clip.tokenize(class_list) + + self.img_process = processor + self.root_dir = '/home/kejingfan/cub/images' + images_list = open('/home/kejingfan/cub/images.txt').readlines() + images_list = [line.strip().split(' ')[1] for line in images_list] + self.images = [] + labels_file = open('/home/kejingfan/cub/image_class_labels.txt').readlines() + labels = [int(line.strip().split(' ')[1]) for line in labels_file] + train_test_split_file = open('/home/kejingfan/cub/train_test_split.txt').readlines() + is_train = [line.strip().split(' ')[1] == '1' for line in train_test_split_file] + for index in range(len(images_list)): + class_id = labels[index] + if (train and is_train[index]) or (not train and not is_train[index]): + self.images.append([os.path.join(self.root_dir, images_list[index]), int(class_id) - 1]) + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + image, target = self.images[index] + token = self.tokens[target] + image = Image.open(image).convert("RGB") + image = self.img_process(image) + return image, token, target diff --git a/Lab/Lab4/code/tune-clip-in-cub/test.py b/Lab/Lab4/code/tune-clip-in-cub/test.py new file mode 100644 index 0000000..9225832 --- /dev/null +++ b/Lab/Lab4/code/tune-clip-in-cub/test.py @@ -0,0 +1,22 @@ +import torch +import torch.nn +import clip +import numpy as np +from tqdm import tqdm + +from get_loader import Classes + + +def test(net, test_dataset, test_loader, device): + net.eval() + total_accuracy = 0.0 + texts = test_dataset.tokens.to(device) + with torch.no_grad(): + for index, (images, tokens, targets) in tqdm(enumerate(test_loader), total=len(test_loader)): + images = images.to(device) + logits_per_image, logits_per_text = net(images, texts) + probs = logits_per_image.softmax(dim=-1).cpu().numpy() + accuracy = np.sum(probs.argmax(1) == targets.numpy()) + total_accuracy += accuracy + net.train() + return total_accuracy / len(test_dataset) diff --git a/Lab/Lab4/code/tune-clip-in-cub/train.py b/Lab/Lab4/code/tune-clip-in-cub/train.py new file mode 100644 index 0000000..4bc9a8e --- /dev/null +++ b/Lab/Lab4/code/tune-clip-in-cub/train.py @@ -0,0 +1,73 @@ +import torch +from torch import nn, optim +from torch.utils.data import DataLoader +from tqdm import tqdm +import clip + +from get_loader import MyDataset +from test import test + + +def convert_models_to_fp32(model): + for p in model.parameters(): + p.data = p.data.float() + p.grad.data = p.grad.data.float() + + +def train(): + batch_size = 64 + learning_rate = 1e-6 + num_epochs = 500 + + device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu") + net, preprocess = clip.load("ViT-L/14", device=device, jit=False) + + if device == 'cpu': + net.float() + else: + clip.model.convert_weights(net) + + loss_img = nn.CrossEntropyLoss() + loss_txt = nn.CrossEntropyLoss() + + optimizer = optim.Adam(net.parameters(), lr=learning_rate, betas=(0.9, 0.98), eps=1e-6, weight_decay=0.2) + + train_dateset = MyDataset(processor=preprocess, train=True) + train_loader = DataLoader(train_dateset, batch_size=batch_size, shuffle=True, num_workers=64, pin_memory=True) + test_dataset = MyDataset(processor=preprocess, train=False) + test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=64, shuffle=True, pin_memory=True) + + print(f'Train dataset size: {len(train_dateset)}\nTest dataset size: {len(test_dataset)}\n') + + for epoch in range(num_epochs): + total_epoch_loss = 0 + for index, (images, tokens, targets) in tqdm(enumerate(train_loader), total=len(train_loader)): + optimizer.zero_grad() + images = images.to(device) + tokens = tokens.to(device) + with torch.set_grad_enabled(True): + logits_per_image, logits_per_text = net(images, tokens) + ground_truth = torch.arange(len(images), dtype=torch.long, device=device) + cur_loss = (loss_img(logits_per_image, ground_truth) + loss_txt(logits_per_text, ground_truth)) / 2 + total_epoch_loss += cur_loss.item() + cur_loss.backward() + + if device == 'cpu': + optimizer.step() + else: + convert_models_to_fp32(net) + optimizer.step() + clip.model.convert_weights(net) + + test_acc = test(net, test_dataset, test_loader, device) + print(f'Total train loss: {total_epoch_loss:.6f}, Test accuracy: {test_acc:.6%}') + print("--------------------------------------------------------------") + torch.save({'epoch': epoch, + 'model_state_dict': net.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': total_epoch_loss, + }, f"model_checkpoint/model-{epoch + 1}_acc-{test_acc*100:.3f}.pt") + + +if __name__ == "__main__": + train() diff --git a/Lab/Lab4/code/tune-clip-in-stanford_cars/get_loader.py b/Lab/Lab4/code/tune-clip-in-stanford_cars/get_loader.py new file mode 100644 index 0000000..8912731 --- /dev/null +++ b/Lab/Lab4/code/tune-clip-in-stanford_cars/get_loader.py @@ -0,0 +1,51 @@ +from PIL import Image +from torch.utils.data import Dataset +import os +import pandas as pd +import clip + + +class Classes: + def __init__(self, classes_file): + self.class2index = {} + self.index2class = {} + classes = pd.read_csv(classes_file) + for index, row in classes.iterrows(): + carname = row['class_names'] + self.class2index['A photo of ' + carname] = index + self.index2class[index] = 'A photo of ' + carname + def __len__(self): + return len(self.class2index) + def get_class(self, num: int): + return self.index2class[num] if (num in self.index2class) else None + def get_id(self, class_name: str): + return ( + self.class2index[class_name] if (class_name in self.class2index) else None + ) + + +class MyDataset(Dataset): + def __init__(self, processor, train=True): + classes = Classes('/home/kejingfan/cars/class_names.csv') + class_list = [classes.get_class(i) for i in range(len(classes))] + self.tokens = clip.tokenize(class_list) + + self.img_process = processor + self.root_dir = '/home/kejingfan/cars' + ('/cars_' + ('train' if train else 'test')) * 2 + train_annos_file = '/home/kejingfan/cars/cars_train_annos.csv' + test_annos_file = '/home/kejingfan/cars/cars_test_annos_withlabels.csv' + images_list = pd.read_csv(train_annos_file if train else test_annos_file) + self.images = [] + for index, row in images_list.iterrows(): + class_id = int(row['class']) + self.images.append([os.path.join(self.root_dir, row['fname']), class_id - 1]) + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + image, target = self.images[index] + token = self.tokens[target] + image = Image.open(image).convert("RGB") + image = self.img_process(image) + return image, token, target \ No newline at end of file diff --git a/Lab/Lab4/code/tune-clip-in-stanford_cars/test.py b/Lab/Lab4/code/tune-clip-in-stanford_cars/test.py new file mode 100644 index 0000000..29d3911 --- /dev/null +++ b/Lab/Lab4/code/tune-clip-in-stanford_cars/test.py @@ -0,0 +1,20 @@ +import torch +import torch.nn +import clip +import numpy as np +from tqdm import tqdm + + +def test(net, test_dataset, test_loader, device): + net.eval() + total_accuracy = 0.0 + texts = test_dataset.tokens.to(device) + with torch.no_grad(): + for index, (images, tokens, targets) in tqdm(enumerate(test_loader), total=len(test_loader)): + images = images.to(device) + logits_per_image, logits_per_text = net(images, texts) + probs = logits_per_image.softmax(dim=-1).cpu().numpy() + accuracy = np.sum(probs.argmax(1) == targets.numpy()) + total_accuracy += accuracy + net.train() + return total_accuracy / len(test_dataset) \ No newline at end of file diff --git a/Lab/Lab4/code/tune-clip-in-stanford_cars/train.py b/Lab/Lab4/code/tune-clip-in-stanford_cars/train.py new file mode 100644 index 0000000..f325955 --- /dev/null +++ b/Lab/Lab4/code/tune-clip-in-stanford_cars/train.py @@ -0,0 +1,73 @@ +import torch +from torch import nn, optim +from torch.utils.data import DataLoader +from tqdm import tqdm +import clip + +from get_loader import MyDataset +from test import test + + +def convert_models_to_fp32(model): + for p in model.parameters(): + p.data = p.data.float() + p.grad.data = p.grad.data.float() + + +def train(): + batch_size = 64 + learning_rate = 1e-6 + num_epochs = 500 + + device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") + net, preprocess = clip.load("ViT-L/14", device=device, jit=False) + + if device == 'cpu': + net.float() + else: + clip.model.convert_weights(net) + + loss_img = nn.CrossEntropyLoss() + loss_txt = nn.CrossEntropyLoss() + + optimizer = optim.Adam(net.parameters(), lr=learning_rate, betas=(0.9, 0.98), eps=1e-6, weight_decay=0.2) + + train_dateset = MyDataset(processor=preprocess, train=True) + train_loader = DataLoader(train_dateset, batch_size=batch_size, shuffle=True, num_workers=64, pin_memory=True) + test_dataset = MyDataset(processor=preprocess, train=False) + test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=64, shuffle=True, pin_memory=True) + + print(f'Train dataset size: {len(train_dateset)}\nTest dataset size: {len(test_dataset)}\n') + + for epoch in range(num_epochs): + total_epoch_loss = 0 + for index, (images, tokens, targets) in tqdm(enumerate(train_loader), total=len(train_loader)): + optimizer.zero_grad() + images = images.to(device) + tokens = tokens.to(device) + with torch.set_grad_enabled(True): + logits_per_image, logits_per_text = net(images, tokens) + ground_truth = torch.arange(len(images), dtype=torch.long, device=device) + cur_loss = (loss_img(logits_per_image, ground_truth) + loss_txt(logits_per_text, ground_truth)) / 2 + total_epoch_loss += cur_loss.item() + cur_loss.backward() + + if device == 'cpu': + optimizer.step() + else: + convert_models_to_fp32(net) + optimizer.step() + clip.model.convert_weights(net) + + test_acc = test(net, test_dataset, test_loader, device) + print(f'Total train loss: {total_epoch_loss:.6f}, Test accuracy: {test_acc:.6%}') + print("--------------------------------------------------------------") + torch.save({'epoch': epoch, + 'model_state_dict': net.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': total_epoch_loss, + }, f"model_checkpoint/model-{epoch + 1}_acc-{test_acc*100:.3f}.pt") + + +if __name__ == "__main__": + train() diff --git a/Lab/Lab4/source/CLIP.png b/Lab/Lab4/source/CLIP.png new file mode 100644 index 0000000..a1b5ec9 Binary files /dev/null and b/Lab/Lab4/source/CLIP.png differ diff --git a/Lab/Lab4/source/car_acc.png b/Lab/Lab4/source/car_acc.png new file mode 100644 index 0000000..92c6cad Binary files /dev/null and b/Lab/Lab4/source/car_acc.png differ diff --git a/Lab/Lab4/source/cub_acc.png b/Lab/Lab4/source/cub_acc.png new file mode 100644 index 0000000..b1064a6 Binary files /dev/null and b/Lab/Lab4/source/cub_acc.png differ diff --git a/Lab/Lab4/source/实验4-柯劲帆21281280&李桦炅21281282.md b/Lab/Lab4/source/实验4-柯劲帆21281280&李桦炅21281282.md new file mode 100644 index 0000000..67b05f4 --- /dev/null +++ b/Lab/Lab4/source/实验4-柯劲帆21281280&李桦炅21281282.md @@ -0,0 +1,367 @@ +

实验报告

+ +
+
课程名称:数字图像处理
+
实验题目:自选课题-CLIP图片分类任务复现
+
姓名学号:柯劲帆21281280; 李桦炅21281282
+
班级:物联网2101班
+
指导老师:安高云
+
报告日期:2024年1月10日
+
+ + + +--- + + + +**目录** + +[TOC] + +--- + + + +# 0. 报告摘要 + +本实验的主要工作是复现CLIP图片分类模型,使用CLIP在两个细粒度分类数据集上进行了finetune和测试,采用预训练Vision Transformer作为图片特征提取器,均实现了较高正确率的图片分类,验证了CLIP的图片分类功能在细粒度分类数据上的有效性。 + + + +| 小组成员名字 | 小组成员学号 | 工作贡献占比 | +| ------------ | ------------ | ------------ | +| 柯劲帆 | 21281280 | 70% | +| 李桦炅 | 21281282 | 30% | + + + +# 1. 论文解读 + +CLIP是OpenAI在2021年提出的一种深度学习图片分类方法。 + +CLIP基本算法原理相对比较简单: + +1. 为了对图片和文本建立联系,首先分别对图片和文本进行特征提取。图片特征提取的backbone可以是Resnet系列模型也可以是VIT系列模型,文本特征提取一般采用Bert模型; +2. 特征提取之后,进行归一化,然后直接相乘来计算余弦距离,同一图片-文本对的结果趋近于1,不同图片-文本对的结果趋近于0,采用对比损失计算loss。这种计算loss方式效果与batch size有很大关系,一般需要比较大的batch size才能有效果。 + +模型图如下: + +![CLIP](CLIP.png) + +伪代码: + +```python +# image_encoder - ResNet or Vision Transformer +# text_encoder - CBOW or Text Transformer +# I[n, h, w, c] - minibatch of aligned images +# T[n, l] - minibatch of aligned texts +# W_i[d_i, d_e] - learned proj of image to embed +# W_t[d_t, d_e] - learned proj of text to embed +# t - learned temperature parameter + +# extract feature representations of each modality +I_f = image_encoder(I) #[n, d_i] +T_f = text_encoder(T) #[n, d_t] + +# joint multimodal embedding [n, d_e] +I_e = l2_normalize(np.dot(I_f, W_i), axis=1) +T_e = l2_normalize(np.dot(T_f, W_t), axis=1) + +# scaled pairwise cosine similarities [n, n] +logits = np.dot(I_e, T_e.T) * np.exp(t) + +# symmetric loss function +labels = np.arange(n) +loss_i = cross_entropy_loss(logits, labels, axis=0) +loss_t = cross_entropy_loss(logits, labels, axis=1) +loss = (loss_i + loss_t)/2 +``` + + + +# 2. 实验过程 + +本次实验我们复现了CLIP,在两个公开的数据集中对CLIP进行finetune,验证其正确率。 + +## 2.1. 实验环境 + +- NVIDIA A40服务器 + +## 2.2. 数据集下载 + +首先我们下载了用于finetune的两个数据集: + +- [Caltech-UCSD Birds-200-2011 (CUB-200-2011)](https://paperswithcode.com/dataset/cub-200-2011) +- [Stanford Cars](https://paperswithcode.com/dataset/stanford-cars) + +都是细粒度的图片分类数据集。 + +## 2.3. finetune代码 + +### 2.3.1. 数据集 + +在本任务中,数据为图片-文本对,因此需要对分类的下标和名字做一个映射,我们使用一个类实现: + +```python +# get_loader.py + +class Classes: + def __init__(self, classes_file): + self.class2index = {} + self.index2class = {} + classes = pd.read_csv(classes_file) + for index, row in classes.iterrows(): + carname = row['class_names'] + self.class2index['A photo of ' + carname] = index + self.index2class[index] = 'A photo of ' + carname + def __len__(self): + return len(self.class2index) + def get_class(self, num: int): + return self.index2class[num] if (num in self.index2class) else None + def get_id(self, class_name: str): + return ( + self.class2index[class_name] if (class_name in self.class2index) else None + ) +``` + +然后对本地的数据集进行读入。 + +两个数据集的存储形式不同: + +- `CUB-200-2011`将训练集和测试集放在同一个文件夹中,以不同类别分文件夹存储,并使用一个表格文件存储图片名称的编号、一个表格存储图片编号的标签、一个表格文件存储图片编号对应的是训练/测试集; +- `Stanford Cars`将训练集和测试集分别放在不同的文件夹里,使用两个表格文件分别存储训练/测试集图片名称编号对应的标签。 + +因此,自定义`MyDataset`类需要针对不同数据集实现不同的读取逻辑。 + +读取`CUB-200-2011`的代码为: + +```python +# get_loader.py + +import clip +from PIL import Image +from torch.utils.data import Dataset +import os + + +class MyDataset(Dataset): + def __init__(self, processor, train=True): + classes = Classes('/home/kejingfan/cub/classes.txt') + class_list = [classes.get_class(i) for i in range(len(classes))] + self.tokens = clip.tokenize(class_list) # 对文本进行tokenize + + self.img_process = processor + # 从表格中获取整个数据集的图片列表 + self.root_dir = '/home/kejingfan/cub/images' + images_list = open('/home/kejingfan/cub/images.txt').readlines() + images_list = [line.strip().split(' ')[1] for line in images_list] + self.images = [] + # 从表格中获取图片对应的标签 + labels_file = open('/home/kejingfan/cub/image_class_labels.txt').readlines() + labels = [int(line.strip().split(' ')[1]) for line in labels_file] + # 从表格中获取图片对应的数据集 + train_test_split_file = open('/home/kejingfan/cub/train_test_split.txt').readlines() + is_train = [line.strip().split(' ')[1] == '1' for line in train_test_split_file] + for index in range(len(images_list)): # 将对应数据集的图片放入列表中 + class_id = labels[index] + if (train and is_train[index]) or (not train and not is_train[index]): + self.images.append([os.path.join(self.root_dir, images_list[index]), int(class_id) - 1]) + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + image, target = self.images[index] + token = self.tokens[target] + image = Image.open(image).convert("RGB") + image = self.img_process(image) # 图片预处理 + return image, token, target +``` + +读取`Stanford Cars`的代码仅在`__init__()`中与读取`CUB-200-2011`的代码有区别。`__init__()`如下: + +```python +def __init__(self, processor, train=True): + classes = Classes('/home/kejingfan/cars/class_names.csv') + class_list = [classes.get_class(i) for i in range(len(classes))] + self.tokens = clip.tokenize(class_list) # 对文本进行tokenize + + self.img_process = processor + # 选择相应数据集的文件夹 + self.root_dir = '/home/kejingfan/cars' + ('/cars_' + ('train' if train else 'test')) * 2 + # 选择相应数据集的标签 + train_annos_file = '/home/kejingfan/cars/cars_train_annos.csv' + test_annos_file = '/home/kejingfan/cars/cars_test_annos_withlabels.csv' + images_list = pd.read_csv(train_annos_file if train else test_annos_file) + self.images = [] + for index, row in images_list.iterrows(): # 将对应数据集的图片放入列表中 + class_id = int(row['class']) + self.images.append([os.path.join(self.root_dir, row['fname']), class_id - 1]) +``` + +### 2.3.2. 测试 + +用于判断训练的效果和进度。 + +```python +# test.py + +import torch +import torch.nn +import clip +from PIL import Image +import argparse +import numpy as np +from tqdm import tqdm +from get_loader import Classes + +def test(net, test_dataset, test_loader, device): + net.eval() + total_accuracy = 0.0 + texts = test_dataset.tokens.to(device) + with torch.no_grad(): + for index, (images, tokens, targets) in tqdm(enumerate(test_loader), total=len(test_loader)): + images = images.to(device) + logits_per_image, logits_per_text = net(images, texts) + probs = logits_per_image.softmax(dim=-1).cpu().numpy() + accuracy = np.sum(probs.argmax(1) == targets.numpy()) + total_accuracy += accuracy + net.train() + return total_accuracy / len(test_dataset) +``` + +### 2.3.3. 训练 + +超参数设置为: + +- batch_size = $64$ +- learning_rate = $10^{-6}$ +- Adam优化器 + +代码如下: + +```python +# train.py + +import torch +from torch import nn, optim +from torch.utils.data import DataLoader +from tqdm import tqdm +import clip + +from get_loader import MyDataset +from test import test + + +def convert_models_to_fp32(model): + for p in model.parameters(): + p.data = p.data.float() + p.grad.data = p.grad.data.float() + + +def train(): + batch_size = 64 + learning_rate = 1e-6 + num_epochs = 500 + + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + net, preprocess = clip.load("ViT-L/14", device=device, jit=False) + + if device == 'cpu': + net.float() + else: + clip.model.convert_weights(net) + + loss_img = nn.CrossEntropyLoss() + loss_txt = nn.CrossEntropyLoss() + + optimizer = optim.Adam(net.parameters(), lr=learning_rate, betas=(0.9, 0.98), eps=1e-6, weight_decay=0.2) + + train_dateset = MyDataset(processor=preprocess, train=True) + train_loader = DataLoader(train_dateset, batch_size=batch_size, shuffle=True, num_workers=64, pin_memory=True) + test_dataset = MyDataset(processor=preprocess, train=False) + test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=64, shuffle=True, pin_memory=True) + + print(f'Train dataset size: {len(train_dateset)}\nTest dataset size: {len(test_dataset)}\n') + + for epoch in range(num_epochs): + total_epoch_loss = 0 + for index, (images, tokens, targets) in tqdm(enumerate(train_loader), total=len(train_loader)): + optimizer.zero_grad() + images = images.to(device) + tokens = tokens.to(device) + with torch.set_grad_enabled(True): + logits_per_image, logits_per_text = net(images, tokens) + ground_truth = torch.arange(len(images), dtype=torch.long, device=device) + cur_loss = (loss_img(logits_per_image, ground_truth) + loss_txt(logits_per_text, ground_truth)) / 2 + total_epoch_loss += cur_loss.item() + cur_loss.backward() + + if device == 'cpu': + optimizer.step() + else: + convert_models_to_fp32(net) + optimizer.step() + clip.model.convert_weights(net) + + test_acc = test(net, test_dataset, test_loader, device) + print(f'Total train loss: {total_epoch_loss:.6f}, Test accuracy: {test_acc:.6%}') + print("----------------------------------------------------------") + torch.save({'epoch': epoch, + 'model_state_dict': net.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': total_epoch_loss, + }, f"model_checkpoint/model-{epoch + 1}_acc-{test_acc*100:.3f}.pt") + + +if __name__ == "__main__": + train() +``` + +## 2.4. 运行过程及结果 + +```sh +$ conda install --yes -c pytorch pytorch=1.7.1 torchvision cudatoolkit=11.0 +$ pip install ftfy regex tqdm +$ pip install git+https://github.com/openai/CLIP.git +``` + +依次运行上述命令,环境配置完成。 + +运行代码。 + +```sh +$ python train.py +``` + +在两个数据集上得到以下结果: + + + + + + +
cub_acccar_acc
+ +| 数据集 | finetune的epoch数 | 第一个epoch的正确率 | 最高正确率 | +| ------------- | ----------------- | ------------------- | ---------- | +| CUB-200-2011 | $61$ | $66.690\%$ | $84.398\%$ | +| Stanford Cars | $30$ | $78.995\%$ | $88.820\%$ | + +可见CLIP在分类任务中达到了非常好的效果。 + + + +# 3. 心得体会 + +在本实验中,我们复现了CLIP图片分类模型,并在`CUB-200-2011`和`Stanford Cars`两个数据集上进行了训练和测试。 + +通过本次实验,我们体会到: + +1. CLIP是一个非常强大的视觉语言模型,能够在零样本下进行分类。它结合了图像模型提取的视觉特征和文本模型提取的语义特征,通过单模态和跨模态对比损失进行联合训练。 +2. 通过finetune,CLIP可以很好地适应特定的图像分类任务,并取得非常高的分类准确率。这验证了CLIP作为预训练模型的强大迁移能力。 +3. 实验中,我们体会到了如何准备图像分类数据集,如何设计训练和测试代码,如何配置模型超参数等实际开发中的经验。这些都对我们今后独立开发图像分类项目具有很好的指导意义。 +4. 整个实验过程顺利,达到了复现CLIP在具体图像分类任务上的强大性能的目的。让我们对视觉语言预训练模型有了更直观的理解。 + +通过这个实验,我们对深度学习在计算机视觉领域的应用有了进一步的理解,掌握了实际的开发调试经验。 diff --git a/Lab/Lab4/实验4-柯劲帆21281280&李桦炅21281282.pdf b/Lab/Lab4/实验4-柯劲帆21281280&李桦炅21281282.pdf new file mode 100644 index 0000000..71b878b Binary files /dev/null and b/Lab/Lab4/实验4-柯劲帆21281280&李桦炅21281282.pdf differ diff --git a/Lab/requirements/数字图像处理实验指导.pdf b/Lab/requirements/数字图像处理实验指导.pdf new file mode 100644 index 0000000..0a5a3e3 Binary files /dev/null and b/Lab/requirements/数字图像处理实验指导.pdf differ diff --git a/Lab/requirements/附件材料/.DS_Store b/Lab/requirements/附件材料/.DS_Store new file mode 100644 index 0000000..f9bdd24 Binary files /dev/null and b/Lab/requirements/附件材料/.DS_Store differ diff --git a/Lab/requirements/附件材料/实验1附件/.DS_Store b/Lab/requirements/附件材料/实验1附件/.DS_Store new file mode 100644 index 0000000..15f3e4b Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/.DS_Store differ diff --git a/Lab/requirements/附件材料/实验1附件/DIPDemo.exe b/Lab/requirements/附件材料/实验1附件/DIPDemo.exe new file mode 100644 index 0000000..1a9c633 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/DIPDemo.exe differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/.DS_Store b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/.DS_Store differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/2010123018103752.jpg b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/2010123018103752.jpg new file mode 100644 index 0000000..0fe7341 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/2010123018103752.jpg differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Boy.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Boy.bmp new file mode 100644 index 0000000..2b2f1bf Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Boy.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Couple.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Couple.bmp new file mode 100644 index 0000000..86290c4 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Couple.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Girl.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Girl.bmp new file mode 100644 index 0000000..dd6813e Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Girl.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Mary.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Mary.bmp new file mode 100644 index 0000000..4ee3be5 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Mary.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Miss.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Miss.bmp new file mode 100644 index 0000000..68dc9b2 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Miss.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Miss.raw.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Miss.raw.bmp new file mode 100644 index 0000000..68dc9b2 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Miss.raw.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(jiaoyan).bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(jiaoyan).bmp new file mode 100644 index 0000000..819b897 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(jiaoyan).bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(junzhi).bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(junzhi).bmp new file mode 100644 index 0000000..48dd966 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(junzhi).bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(sui).bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(sui).bmp new file mode 100644 index 0000000..e60137b Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(sui).bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(zhongzhi).bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(zhongzhi).bmp new file mode 100644 index 0000000..834d710 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(zhongzhi).bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(zhongzhiF).bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(zhongzhiF).bmp new file mode 100644 index 0000000..956e99d Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda(zhongzhiF).bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda.bmp new file mode 100644 index 0000000..ed2482e Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda.raw.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda.raw.bmp new file mode 100644 index 0000000..ed2482e Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda.raw.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda1.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda1.bmp new file mode 100644 index 0000000..1723fba Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda1.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda2.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda2.bmp new file mode 100644 index 0000000..5063af8 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda2.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda3.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda3.bmp new file mode 100644 index 0000000..7444ae9 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda3.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda4.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda4.bmp new file mode 100644 index 0000000..c2e05b4 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda4.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda5.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda5.bmp new file mode 100644 index 0000000..a5f2521 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda5.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda6.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda6.bmp new file mode 100644 index 0000000..008fb73 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda6.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda7.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda7.bmp new file mode 100644 index 0000000..9c8a0e1 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda7.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda8.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda8.bmp new file mode 100644 index 0000000..8cb92b7 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/Panda8.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/gray.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/gray.bmp new file mode 100644 index 0000000..8ae41e6 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/gray.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/junheng.bmp b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/junheng.bmp new file mode 100644 index 0000000..58f01ff Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/junheng.bmp differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/lena1.jpg b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/lena1.jpg new file mode 100644 index 0000000..9bd97f8 Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/lena1.jpg differ diff --git a/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/u=3269486150,4145897387&fm=0&gp=0.jpg b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/u=3269486150,4145897387&fm=0&gp=0.jpg new file mode 100644 index 0000000..90e4ecf Binary files /dev/null and b/Lab/requirements/附件材料/实验1附件/图像处理中的常用标准图像/u=3269486150,4145897387&fm=0&gp=0.jpg differ diff --git a/Lab/requirements/附件材料/实验2附件.pdf b/Lab/requirements/附件材料/实验2附件.pdf new file mode 100644 index 0000000..3ca492d Binary files /dev/null and b/Lab/requirements/附件材料/实验2附件.pdf differ diff --git a/Lab/requirements/附件材料/实验4附件.ppt b/Lab/requirements/附件材料/实验4附件.ppt new file mode 100644 index 0000000..20c57dd Binary files /dev/null and b/Lab/requirements/附件材料/实验4附件.ppt differ