实现一个目标检测项目

一、了解数据集

我们将使用Kaggle.com上的这个目标检测数据集 ,它包含了3类(黄瓜、茄子和蘑菇)共373个已经标注了目标边框的图像文件。
我们的目标是解析图像并进行归一化处理,同时从XML格式的标注文件中解析得到目标物体包围框的两个顶点(左下角与右上角)的坐标。
如果你希望创建自己的标注数据集也没有问题!你可以使用LabelImage 。利用LabelImage你可以快速标注目标物体的包围边框,然后保存为PASCAL-VOC格式。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
<annotation>  
<folder>VOC2007</folder>
<filename>2007_000392.jpg</filename> //文件名
<source> //图像来源(不重要)
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
</source>
<size> //图像尺寸(长宽以及通道数)
<width>500</width>
<height>332</height>
<depth>3</depth>
</size>
<segmented>1</segmented> //是否用于分割(在图像物体识别中0、1无所谓)
<object> //检测到的物体
<name>horse</name> //物体类别
<pose>Right</pose> //拍摄角度
<truncated>0</truncated> //是否被截断(0表示完整)
<difficult>0</difficult> //目标是否难以识别(0表示容易识别)
<bndbox> //bounding-box(包含左下角和右上角的坐标)
<xmin>100</xmin>
<ymin>96</ymin>
<xmax>355</xmax>
<ymax>324</ymax>
</bndbox>
</object>
<object> //检测到多个物体
<name>person</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>198</xmin>
<ymin>58</ymin>
<xmax>286</xmax>
<ymax>197</ymax>
</bndbox>
</object>
</annotation>

二、数据准备

1、生成训练集和测试集

首先,我们将下载的数据集解压后拆分成训练集和测试集存放到各自文件夹中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import shutil
import numpy as np

def image_preparation(original_dir, base_dir, labels):
'''
文件准备, 将文件复制到训练\测试集目录
INPUT -> 原始数据集地址, 数据集存放地址, 分类列表
'''
# 定义文件地址
base_dir = base_dir
if not os.path.exists(base_dir):
os.mkdir(base_dir)
train_dir = os.path.join(base_dir, 'traindata')
if not os.path.exists(train_dir):
os.mkdir(train_dir)
test_dir = os.path.join(base_dir, 'testdata')
if not os.path.exists(test_dir):
os.mkdir(test_dir)

names = locals()
# 图片转移
for label in labels:
names["train_"+str(label)+"dir"] = os.path.join(train_dir, str(label))
if not os.path.exists(names["train_"+str(label)+"dir"]):
os.mkdir(names["train_"+str(label)+"dir"])
names["test_"+str(label)+"dir"] = os.path.join(test_dir, str(label))
if not os.path.exists(names["test_"+str(label)+"dir"]):
os.mkdir(names["test_"+str(label)+"dir"])

fnames = [os.path.splitext(i)[0] for i in os.listdir(original_dir) if i.split('_')[0] == str(label)]
for fname in fnames[:int(len(fnames)*0.5)]:
img_src = os.path.join(original_dir, '%s.jpg' % (fname))
img_dst = os.path.join(names["train_"+str(label)+"dir"], '%s.jpg' % (fname))
shutil.copyfile(img_src, img_dst)
xml_src = os.path.join(original_dir, '%s.xml' % (fname))
xml_dst = os.path.join(names["train_"+str(label)+"dir"], '%s.xml' % (fname))
shutil.copyfile(xml_src, xml_dst)
for fname in fnames[int(len(fnames)*0.5):]:
img_src = os.path.join(original_dir, '%s.jpg' % (fname))
img_dst = os.path.join(names["test_"+str(label)+"dir"], '%s.jpg' % (fname))
shutil.copyfile(img_src, img_dst)
xml_src = os.path.join(original_dir, '%s.xml' % (fname))
xml_dst = os.path.join(names["test_"+str(label)+"dir"], '%s.xml' % (fname))
shutil.copyfile(xml_src, xml_dst)
print('total train '+str(label)+' images:', len(os.listdir(names["train_"+str(label)+"dir"])))
print('total test '+str(label)+' images:', len(os.listdir(names["test_"+str(label)+"dir"])))

originial_dataset_dir = 'D:/download/original_data'
base_dir = 'Vegetables/'
# 将图片分别存到各个文件夹
image_preparation(originial_dataset_dir, base_dir, ['cucumber','eggplant','mushroom'])

# 分类数
n_classes = 0
for fn in os.listdir(os.path.join(base_dir, 'traindata')):
n_classes += 1

2、数据处理

接下来,我们需要处理一下图像和其xml标注。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import numpy as np
import pandas as pd
import cv2 as cv
import xmltodict
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset

#========================================================
# 配置
#========================================================

base_dir = 'Vegetables/'
input_dim = 224
BATCH_SIZE = 16

#========================================================
# 自定义数据集类
#========================================================

class LocationDataSet(Dataset):
def __init__(self, root_dir, transform=None, input_dim=224):
cates = ['cucumber', 'eggplant', 'mushroom']
class_binary_label = pd.get_dummies(cates).values
self.transform = transform

self.imgs = []
self.bboxes = []
self.classes = []

for cate in cates:
cate_dir = os.path.join(root_dir, str(cate))
fnames = [i for i in os.listdir(cate_dir) if i.split('_')[0] == str(cate)]
for fname in fnames:
img_path = os.path.join(cate_dir, str(fname))
img = cv.imread(img_path)

xml_path = os.path.join(cate_dir, '%s.xml' % (os.path.splitext(fname)[0]))
x = xmltodict.parse(open(xml_path, 'rb'))
bndbox = x['annotation']['object']['bndbox']
bndbox = np.array([float(bndbox['xmin']), float(bndbox['ymin']), float(bndbox['xmax']), float(bndbox['ymax'])])
bndbox = bndbox / input_dim

self.imgs.append(img)
self.bboxes.append(np.hstack((bndbox, class_binary_label[cates.index(cate)])))
self.classes.append(cate)

def __getitem__(self, idx):
img = self.imgs[idx]
if self.transform:
sample = self.transform(img)
else:
sample = img
return sample, torch.Tensor(self.bboxes[idx]).float()

def __len__(self):
return len(self.imgs)

#========================================================
# 数据加工
#========================================================

# 定义数据预处理方式
transform = transforms.Compose([
transforms.Resize(256), # 缩放图片, 保持长宽比不变, 最短边256
transforms.CenterCrop(224), # 从图片中间切出224x224的图片
transforms.ToTensor(), # 将图片(Image)转化为Tensor, 归一化到[0,1]
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # 标准化到[-1,1]
])

# 定义训练数据集
trainset = LocationDataSet(os.path.join(base_dir, 'traindata'), transform=transform, input_dim=input_dim)

# 定义训练批处理数据
trainloader = torch.utils.data.DataLoader(
trainset,
batch_size=BATCH_SIZE,
shuffle=True
)

# 定义测试数据集
testset = LocationDataSet(os.path.join(base_dir, 'testdata'), transform=transform, input_dim=input_dim)

# 定义测试批处理数据
testloader = torch.utils.data.DataLoader(
testset,
batch_size=BATCH_SIZE,
shuffle=False
)

二、模型和损失函数

1、构建模型

我们堆叠几个Conv2D层并拉平其输出,然后送入后边的全连接层。为了避免过拟合,我们在全连接层使用Dropout,并使用LeakyReLU激活层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class Net(nn.Module):

def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=5, padding=2, bias=False),
# 卷积后图像尺寸 = (224+2*2-5)/步长+1 = 224
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (224-2)/步长+1 = 112
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, kernel_size=5, padding=2, bias=False),
# 卷积后图像尺寸 = (112+2*2-5)/步长+1 = 112
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (112-2)/步长+1 = 56
)
self.conv3 = nn.Sequential(
nn.Conv2d(16, 120, kernel_size=5, padding=2, bias=False),
# 卷积后图像尺寸 = (56+2*2-5)/步长+1 = 56
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (56-2)/步长+1 = 28
)

self.classifier = nn.Sequential(
nn.Linear(120*28*28, 120),
nn.LeakyReLU(),
nn.Dropout(),
nn.Linear(120, 84),
nn.LeakyReLU(),
nn.Dropout(),
nn.Linear(84, 7) # 3个类别+两组坐标
)

def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)

x = x.view(x.size(0), 120*28*28)

out = self.classifier(x)
return out

2、定义损失模型

设计好模型后,我们要为模型定义一个损失函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class LocationLoss(nn.Module):
'''
损失函数(同时使用了平方差和交并比)
'''
def calculate_iou(self, target_boxes, pred_boxes):
# 计算重叠区域的左下角和右上角坐标
x_min = torch.max(target_boxes[:, 0], pred_boxes[:, 0])
y_min = torch.max(target_boxes[:, 1], pred_boxes[:, 1])
x_max = torch.min(target_boxes[:, 2], pred_boxes[:, 2])
y_max = torch.min(target_boxes[:, 3], pred_boxes[:, 3])
# 计算交集面积
intersection = torch.max(torch.zeros(x_max.shape).cuda(), x_max - x_min) * torch.max(torch.zeros(y_max.shape).cuda(), y_max - y_min)

# 计算两个边界框面积
boxAArea = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
boxBArea = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])

iou = intersection / (boxAArea + boxBArea - intersection)
return iou

def forward(self, target_boxes, pred_boxes):
mseloss = nn.MSELoss().forward(target_boxes, pred_boxes)
iouloss = torch.mean(1 - self.calculate_iou(target_boxes, pred_boxes))

return mseloss + iouloss

其他代码和一般的图像识别项目没太大区别。

三、完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
import os
import numpy as np
import pandas as pd
import cv2 as cv
from PIL import Image
import xmltodict
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from matplotlib import pyplot as plt

#========================================================
# 配置
#========================================================

originial_dataset_dir = 'D:/download/original_data'
base_dir = 'Vegetables/'

input_dim = 224

LR = 0.003
BATCH_SIZE = 16
EPOCH = 30

save_dir = './model/1.pth'

#========================================================
# 生成训练集和测试集
#========================================================

def image_preparation(original_dir, base_dir, labels):
'''
文件准备, 将文件复制到训练\测试集目录
INPUT -> 原始数据集地址, 数据集存放地址, 分类列表
'''
# 定义文件地址
base_dir = base_dir
if not os.path.exists(base_dir):
os.mkdir(base_dir)
train_dir = os.path.join(base_dir, 'traindata')
if not os.path.exists(train_dir):
os.mkdir(train_dir)
test_dir = os.path.join(base_dir, 'testdata')
if not os.path.exists(test_dir):
os.mkdir(test_dir)

names = locals()
# 图片转移
for label in labels:
names["train_"+str(label)+"dir"] = os.path.join(train_dir, str(label))
if not os.path.exists(names["train_"+str(label)+"dir"]):
os.mkdir(names["train_"+str(label)+"dir"])
names["test_"+str(label)+"dir"] = os.path.join(test_dir, str(label))
if not os.path.exists(names["test_"+str(label)+"dir"]):
os.mkdir(names["test_"+str(label)+"dir"])

fnames = [os.path.splitext(i)[0] for i in os.listdir(original_dir) if i.split('_')[0] == str(label)]
for fname in fnames[:int(len(fnames)*0.5)]:
img_src = os.path.join(original_dir, '%s.jpg' % (fname))
img_dst = os.path.join(names["train_"+str(label)+"dir"], '%s.jpg' % (fname))
shutil.copyfile(img_src, img_dst)
xml_src = os.path.join(original_dir, '%s.xml' % (fname))
xml_dst = os.path.join(names["train_"+str(label)+"dir"], '%s.xml' % (fname))
shutil.copyfile(xml_src, xml_dst)
for fname in fnames[int(len(fnames)*0.5):]:
img_src = os.path.join(original_dir, '%s.jpg' % (fname))
img_dst = os.path.join(names["test_"+str(label)+"dir"], '%s.jpg' % (fname))
shutil.copyfile(img_src, img_dst)
xml_src = os.path.join(original_dir, '%s.xml' % (fname))
xml_dst = os.path.join(names["test_"+str(label)+"dir"], '%s.xml' % (fname))
shutil.copyfile(xml_src, xml_dst)
print('total train '+str(label)+' images:', len(os.listdir(names["train_"+str(label)+"dir"])))
print('total test '+str(label)+' images:', len(os.listdir(names["test_"+str(label)+"dir"])))

# 将图片分别存到各个文件夹
image_preparation(originial_dataset_dir, base_dir, ['cucumber','eggplant','mushroom'])

# 分类数
n_classes = 0
for fn in os.listdir(os.path.join(base_dir, 'traindata')):
n_classes += 1

#========================================================
# 自定义数据集类
#========================================================

class LocationDataSet(Dataset):
def __init__(self, root_dir, transform=None, input_dim=224):
cates = ['cucumber', 'eggplant', 'mushroom']
class_binary_label = pd.get_dummies(cates).values
self.transform = transform

self.imgs = []
self.bboxes = []
self.classes = []

for cate in cates:
cate_dir = os.path.join(root_dir, str(cate))
fnames = [os.path.splitext(i)[0] for i in os.listdir(cate_dir) if i.split('_')[0] == str(cate)]
for fname in fnames:
img_path = os.path.join(cate_dir, '%s.jpg' % (fname))
img = Image.open(img_path)

xml_path = os.path.join(cate_dir, '%s.xml' % (fname))
x = xmltodict.parse(open(xml_path, 'rb'))
bndbox = x['annotation']['object']['bndbox']
bndbox = np.array([float(bndbox['xmin']), float(bndbox['ymin']), float(bndbox['xmax']), float(bndbox['ymax'])])
bndbox = bndbox / input_dim

self.imgs.append(img)
self.bboxes.append(np.hstack((bndbox, class_binary_label[cates.index(cate)])))
self.classes.append(cate)

def __getitem__(self, idx):
img = self.imgs[idx]
if self.transform:
sample = self.transform(img)
else:
sample = img
return sample, torch.Tensor(self.bboxes[idx]).float()

def __len__(self):
return len(self.imgs)

#========================================================
# 数据加工
#========================================================

# 定义数据预处理方式
transform = transforms.Compose([
transforms.Resize(256), # 缩放图片, 保持长宽比不变, 最短边256
transforms.CenterCrop(224), # 从图片中间切出224x224的图片
transforms.ToTensor(), # 将图片(Image)转化为Tensor, 归一化到[0,1]
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # 标准化到[-1,1]
])

# 定义训练数据集
trainset = LocationDataSet(os.path.join(base_dir, 'traindata'), transform=transform, input_dim=input_dim)

# 定义训练批处理数据
trainloader = torch.utils.data.DataLoader(
trainset,
batch_size=BATCH_SIZE,
shuffle=True
)

# 定义测试数据集
testset = LocationDataSet(os.path.join(base_dir, 'testdata'), transform=transform, input_dim=input_dim)

# 定义测试批处理数据
testloader = torch.utils.data.DataLoader(
testset,
batch_size=BATCH_SIZE,
shuffle=False
)

#========================================================
# 模型结构设计
#========================================================

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=5, padding=2, bias=False),
# 卷积后图像尺寸 = (224+2*2-5)/步长+1 = 224
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (224-2)/步长+1 = 112
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, kernel_size=5, padding=2, bias=False),
# 卷积后图像尺寸 = (112+2*2-5)/步长+1 = 112
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (112-2)/步长+1 = 56
)
self.conv3 = nn.Sequential(
nn.Conv2d(16, 120, kernel_size=5, padding=2, bias=False),
# 卷积后图像尺寸 = (56+2*2-5)/步长+1 = 56
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
# 经过池化层后图像尺寸 = (56-2)/步长+1 = 28
)

self.classifier = nn.Sequential(
nn.Linear(120*28*28, 120),
nn.LeakyReLU(),
nn.Dropout(),
nn.Linear(120, 84),
nn.LeakyReLU(),
nn.Dropout(),
nn.Linear(84, n_classes+4) # 3个类别+两组坐标
)

def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)

x = x.view(x.size(0), 120*28*28)

out = self.classifier(x)
return out

#========================================================
# 定义损失函数
#========================================================

class LocationLoss(nn.Module):
'''
损失函数(同时使用了平方差和交并比)
'''
def calculate_iou(self, target_boxes, pred_boxes):
# 计算重叠区域的左下角和右上角坐标
x_min = torch.max(target_boxes[:, 0], pred_boxes[:, 0])
y_min = torch.max(target_boxes[:, 1], pred_boxes[:, 1])
x_max = torch.min(target_boxes[:, 2], pred_boxes[:, 2])
y_max = torch.min(target_boxes[:, 3], pred_boxes[:, 3])
# 计算交集面积
intersection = torch.max(torch.zeros(x_max.shape).cuda(), x_max - x_min) * torch.max(torch.zeros(y_max.shape).cuda(), y_max - y_min)

# 计算两个边界框面积
boxAArea = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
boxBArea = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])

iou = intersection / (boxAArea + boxBArea - intersection)
return iou

def forward(self, target_boxes, pred_boxes):
mseloss = nn.MSELoss().forward(target_boxes, pred_boxes)
iouloss = torch.mean(1 - self.calculate_iou(target_boxes, pred_boxes))

return mseloss + iouloss

#========================================================
# 模型实例化
#========================================================

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Net().to(device)

# 声明损失函数loss function 和优化方式(采用SGD)
criterion = LocationLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9)

#========================================================
# 模型训练
#========================================================

def train_work(mode=None):
'''
训练阶段
'''
net.train() # 设置模式为训练模式
if mode == 'Update' and os.path.exists(save_dir):
net.load_state_dict(torch.load(save_dir)) # 加载模型参数
loss_over_time = []
for epoch in range(EPOCH):
running_loss = 0.0
# 数据读取
i = 0
for data in trainloader:
i += 1
images, labels = data
images, labels = images.to(device), labels.to(device)

optimizer.zero_grad()

# forward + backward + optimize
outputs = net(images)
loss = criterion.forward(labels, outputs)
loss.backward()
optimizer.step()

# 每训练10个batch打印一次平均loss
running_loss += loss.item()
if i % 10 == 9:
avg_loss = running_loss/10
loss_over_time.append(avg_loss)
print('[%d] loss: %.03f' % (epoch + 1, avg_loss))
running_loss = 0.0

# 每跑完一次epoch测试一下准确率
with torch.no_grad():
correct = 0
total = 0
for data in trainloader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
predicted = torch.nn.functional.one_hot(torch.argmax(outputs[:, 4:7], dim=1), num_classes=n_classes)
correct += torch.mean((predicted.float() == labels[:, 4:7].float()).float()).item()
total += 1
print('第%d个epoch的识别准确率为:%d%%' % (epoch + 1, (100 * correct / total)))
print('Finished Training')
torch.save(net.state_dict(), save_dir)
return loss_over_time

def show_loss(training_loss):
'''
可视化损失变化
'''
plt.plot(training_loss)
plt.xlabel('10\'s of batches')
plt.ylabel('loss')
plt.ylim(0, 2.5) # consistent scale
plt.show()

def show_predicted():
'''
展示预测结果
'''
img, label = testloader.dataset.__getitem__(0)
image = img.unsqueeze(0).to(device)
label = label.unsqueeze(0)

net.load_state_dict(torch.load(save_dir))
net.eval()

outputs = net(image)
predict_cate = torch.argmax(outputs[:, 4:7], dim=1)
truth_cate = torch.argmax(label[:, 4:7], dim=1)

img = img * 0.5 + 0.5
pil_transform = transforms.ToPILImage()

truth_rect = label[:, :4] * input_dim
predict_rect = outputs[:, :4] * input_dim
print(truth_rect)
print(predict_rect)

origin = np.array(pil_transform(img), dtype=np.uint8)

x_min, y_min, x_max, y_max = truth_rect.squeeze()[:4]
cv.rectangle(origin, (x_min, y_min), (x_max, y_max),
(0, 255, 0), thickness=2)
x_min, y_min, x_max, y_max = predict_rect.squeeze()[:4]
cv.rectangle(origin, (x_min, y_min), (x_max, y_max),
(0, 0, 255), thickness=2)
cv.imwrite('box_detector.png', origin)

#========================================================
# 主程序
#========================================================

if __name__ == "__main__":
training_loss = train_work('Update')
show_loss(training_loss)
show_predicted()

0%