卷积神经网络学习经验可视化

人们常说神经网络的解释性不强,即神经网络模型是一个“黑盒”,它学到的经验很难用人类可以理解的方式呈现(反例是树模型,可解释性强)。这种说法不完全正确,卷积神经网络学习到的“经验”就非常适合可视化,因为很大程度上它们是视觉概念的表示。

可视化卷积核

卷积神经网络会学习识别各种空间模式,我们可以通过查看构成每个卷积核的权重并将这些权重一次性应用于样本图像来可视化每个卷积层已被训练识别的内容,这种技术称为可视化卷积核,它对于理解CNN的内部工作方式来说很有帮助。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import torch
from torch import nn
from matplotlib import pyplot as plt

#========================================================
# 模型结构设计
#========================================================

class LeNet5(nn.Module):
'''
LeNet5网络
INPUT -> 图像规格(28, 28, 1), 待分类数(10)
'''
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2, bias=False),
# 第一次卷积后图像尺寸 = (28+2*2-5)/步长+1 = 28
nn.Tanh(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (28-2)/步长+1 = 14
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, kernel_size=5, bias=False),
# 第二次卷积后图像尺寸 = (14-5)/步长+1 = 10
nn.Tanh(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (10-2)/步长+1 = 5
)
self.classifier = nn.Sequential(
nn.Linear(16*5*5, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, 10)
)

def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# nn.Linear()的输入输出都是一维数组,所以要把tensor展成一维
x = x.view(x.size(0), 16*5*5)
# x = x.view(x.size()[0], -1)
x = self.classifier(x)
return x

#========================================================
# 提取已训练的卷积核参数
#========================================================

net = LeNet5()
net.load_state_dict(torch.load("./model/net_best.pth"))

weights = net.conv2._modules['0'].weight.data
w = weights.numpy()

# 10 filters
fig=plt.figure(figsize=(20, 8))
columns = 5
rows = 2
for i in range(0, columns*rows):
fig.add_subplot(rows, columns, i+1)
plt.imshow(w[i][0], cmap='gray')
print('Second convolutional layer')
plt.show()

可视化特征图(中间层)

可视化特征图(中间层)可以我们真正了解每个滤波器学习到的是什么。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import cv2
import numpy as np
import torch
from torch.autograd import Variable
from torchvision import models

class LayerVisualization():
'''
特征图可视化
'''
def __init__(self, img_path, selected_layer):
'''
初始化
INPUT --> 图片地址, 选定的层
'''
self.img_path = img_path
self.selected_layer = selected_layer
self.pretrained_model = models.vgg16(pretrained=True).features # 采用VGG16预训练模型
self.pretrained_model.eval() # 让model变成测试模式, 把BN和Dropout固定住, 不会取平均, 而是用训练好的值

def preprocess_image(self, cv2im):
'''
图像处理
INPUT --> 图片文件
OUTPUT --> Pytorch所需格式的图片
'''
cv2im = cv2.resize(cv2im, (224, 224))
im_as_arr = np.float32(cv2im)
im_as_arr = np.ascontiguousarray(im_as_arr) # 将一个内存不连续存储的数组转换为内存连续存储的数组, 使得运行速度更快
im_as_arr = im_as_arr.transpose(2, 0, 1) # 改变通道顺序为 (D, W, H)
# 图片通道归一化
for channel, _ in enumerate(im_as_arr):
im_as_arr[channel] /= 255
# 格式转为float tensor
im_as_ten = torch.from_numpy(im_as_arr).float()
# 添加额外通道, 构造Tensor shape = 1,3,224,224
im_as_ten.unsqueeze_(0)
# 格式转为Pytorch variable
im_as_var = Variable(im_as_ten, requires_grad=True)
return im_as_var

def get_layer(self):
'''
获取指定层
'''
img = cv2.imread(self.img_path)
# input = Variable(torch.randn(1, 3, 224, 224))
input = self.preprocess_image(img)
# print(input.shape)
x = input
for index, layer in enumerate(self.pretrained_model):
x = layer(x)
if (index == self.selected_layer):
return x

def get_single_activation(self):
'''
返回单个通道特征图
'''
activations = self.get_layer()
# print(activations.shape)
# 只取一个通道的特征图
activation = activations[:,0,:,:]
# print(activation.shape)
activation = activation.view(activation.shape[1], activation.shape[2])
# print(activation.shape)
activation = activation.data.numpy()
return activation

def get_all_activation(self):
'''
返回全部通道拼接特征图
'''
# 获得指定层特征图
activations = self.get_layer()
# 通道数
n_activations = activations.shape[1]
# 每行显示通道数量
images_per_row = 16
# 特征图尺寸
size = activations.shape[-1]
# 需要多少行才能排满
n_cols = n_activations // images_per_row
# 将特征通道平铺
display_grid = np.zeros((n_cols*size, images_per_row*size))
for col in range(n_cols):
for row in range(images_per_row):
# 定位特征通道
channel_image = activations[:,(col*images_per_row+row),:,:]
channel_image = channel_image.view(channel_image.shape[1], channel_image.shape[2])
channel_image = channel_image.data.numpy()
display_grid[col * size : (col + 1) * size, row * size : (row + 1) * size] = channel_image
return display_grid

def save_activation_to_img(self):
'''
保存特征图
'''
# 单张特征图
# layer_arr = self.get_single_activation()
# 全部特征图
layer_arr = self.get_all_activation()

# 使用sigmod函数规范化到[0,1]区间
layer_arr = 1.0/(1+np.exp(-1*layer_arr))
# 还原像素值为[0,255]区间
layer_arr = np.round(layer_arr*255)

# 设置彩色特征图
layer_arr = layer_arr.astype(np.uint8)
layer_arr = cv2.applyColorMap(layer_arr, cv2.COLORMAP_JET)

cv2.imwrite('./LayerVisualization.jpg', layer_arr)
print('特征图已保存~')

if __name__=='__main__':
mymodel = LayerVisualization('./123.jpg', 3)
# print(mymodel.pretrained_model)
mymodel.save_activation_to_img()

github上用pytorch对CNN网络进行可视化的代码

可视化类激活图(显著图)

类激活图(CAM class activation map)可视化,它是指对生成的图像生成类激活热力图。类激活热力图是与特定输出类别相关的二维分数网格,对任何输入图像的每个位置都要进行计算,它表示每个位置对该类别的重要程度。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#-*- coding:utf-8 -*-
import torch
from torchvision import models
from flashtorch.utils import load_image, apply_transforms, format_for_plotting, denormalize, ImageNetIndex
from flashtorch.saliency import Backprop
from matplotlib import pyplot as plt
import warnings;warnings.filterwarnings("ignore")

#========================================================
# 加载图片
#========================================================

image = load_image('123321.jpg')
input_ = apply_transforms(image)

# plt.imshow(format_for_plotting(denormalize(image)))
# plt.show()

#========================================================
# 加载模型
#========================================================

model = models.alexnet(pretrained=True)
backprop = Backprop(model)

#############################################
# 可视化类激活热力图
#############################################

imagenet = ImageNetIndex()
target_class = imagenet['tiger cat']
print(target_class)

backprop.visualize(input_, target_class, guided=True)
plt.show()
0%