caffe-classify

环境

caffe, Ubuntu16.04, python2.7

编译caffe

这里需要注意一点,读取数据用python读取图片,不用官方推荐的lmdb数据格式,这样更容易定位问题

定义类名为

1
classifyInput

的python类文件:

1
classify-input.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import caffe
import numpy as np
import yaml
import os
import cPickle
import cv2

class classifyInput(caffe.Layer):

def load_data_annotations(self, index):
label, img_path = index.split(' ')
return {'label' : label, 'image' : img_path, 'flipped' : False}


def get_rand_idx(self):
inds = np.arange(len(self._train_roidb))
inds = np.reshape(inds, (-1, 2))
row_perm = np.random.permutation(np.arange(inds.shape[0]))
inds = np.reshape(inds[row_perm, :], (-1,))
self._perm = inds
self._cur = 0


def setup(self, bottom, top):
layer_params = yaml.load(self.param_str_)
self._cfg_path = layer_params['cfg_path']
cfg_dict = {}
with open(self._cfg_path) as fp:
for line in fp:
data = line.strip().split(':')
cfg_dict[data[0]] = data[1]
self._batch_size = int(cfg_dict['batch_size'])
self._num_classes = int(cfg_dict['num_classes'])
self._train_path = cfg_dict['train_path']
self._test_path = cfg_dict['test_path']
self._size_w = int(cfg_dict['size_w'])
self._size_h = int(cfg_dict['size_h'])
self._flip = int(cfg_dict['flip'])
self._mean_value = int(cfg_dict['mean_value'])

self._train_image_index = []
with open(self._train_path) as fp:
for line in fp:
data_path = line.strip()
self._train_image_index.append(data_path)
self._test_image_index = []
with open(self._test_path) as fp:
for line in fp:
data_path = line.strip()
self._test_image_index.append(data_path)

self._test_roidb = [self.load_data_annotations(index) for index in self._test_image_index]
self._train_roidb = [self.load_data_annotations(index) for index in self._train_image_index]
if(self._flip == 1):
num_images = len(self._train_roidb)
for i in xrange(num_images):
self._train_roidb.append({'label' : self._train_roidb[i]['label'], 'image' : self._train_roidb[i]['image'], 'flipped' : True})
self._train_image_index = self._train_image_index * 2

self._name_to_top_map = {}
self._name_to_top_map['data'] = 0
self._name_to_top_map['label'] = 1
top[0].reshape(1, 3, self._size_h, self._size_w)
top[1].reshape(1,)
self.get_rand_idx()
self._test_cur = 0


def forward(self, bottom, top):
if (self.phase == caffe.TRAIN):
image_num = len(self._train_roidb)
if(self._cur + self._batch_size > image_num):
self.get_rand_idx()
db_inds = self._perm[self._cur:self._cur + self._batch_size]
self._cur += self._batch_size
minibatch_db = [self._train_roidb[i] for i in db_inds]

elif (self.phase == caffe.TEST):
test_image_num = len(self._test_roidb)
if(self._test_cur + self._batch_size > test_image_num):
minibatch_db = self._test_roidb[self._test_cur:] + self._test_roidb[:self._batch_size - test_image_num + self._test_cur]
self._test_cur = self._batch_size - test_image_num + self._test_cur
else:
minibatch_db = self._test_roidb[self._test_cur : self._test_cur + self._batch_size]
self._test_cur += self._batch_size

im_blob = np.zeros((len(minibatch_db), 3, self._size_h, self._size_w), dtype=np.float32)
im_labels = np.zeros((len(minibatch_db),), dtype=np.float32)
for i in xrange(len(minibatch_db)):
im = cv2.imread(minibatch_db[i]['image'])
im = cv2.resize(im, (self._size_w, self._size_h), interpolation=cv2.INTER_LINEAR)
if(minibatch_db[i]['flipped']):
im = im[:, ::-1, :]
im = im.astype(np.float32, copy=False)
if(self._mean_value == 1):
pixel_means = np.array([[[103.52, 116.28, 123.675]]])
im -= pixel_means
else:
im = im / 255.0
im_blob[i, :] = im[:,:,::-1].transpose([2,0,1])
im_labels[i] = minibatch_db[i]['label']
blobs = {'data': im_blob, 'label': im_labels}
for blob_name, blob in blobs.iteritems():
top_ind = self._name_to_top_map[blob_name]
# Reshape net's input blobs
top[top_ind].reshape(*(blob.shape))
# Copy data into net's input blobs
top[top_ind].data[...] = blob.astype(np.float32, copy=False)


def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass


def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass

继承caffe.Layer,复写setup()方法和forward()方法,定义完后将文件放在

1
path/caffe-master/python

文件夹下

添加环境变量

1
export LD_LIBRARY_PATH=/usr/local/cuda-8.0/lib64:/usr/local/lib:/xxx/software/anaconda2/lib/

制作数据

将数据标注成pascal voc的格式,再使用如下脚本随机切取目标

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import os
import cv2
import numpy.random as npr
import numpy as np
import xml.dom.minidom

def IoU(box, boxes):
box_area = box[2] * box[3]
area = boxes[:, 3] * boxes[:, 4]
xx1 = np.maximum(box[0]-box[2]/2, boxes[:, 1]-boxes[:, 3]/2)
yy1 = np.maximum(box[1]-box[3]/2, boxes[:, 2]-boxes[:, 4]/2)
xx2 = np.minimum(box[0]+box[2]/2, boxes[:, 1]+boxes[:, 3]/2)
yy2 = np.minimum(box[1]+box[3]/2, boxes[:, 2]+boxes[:, 4]/2)

w = np.maximum(0, xx2 - xx1)
h = np.maximum(0, yy2 - yy1)

inter = w * h
ovr = inter / (box_area + area - inter)
return ovr

def iou(box1, box2):
s1x = box1[2] - box1[0]
s1y = box1[3] - box1[1]
s1 = s1x * s1y
s2x = box2[2] - box2[0]
s2y = box2[3] - box2[1]
s2 = s2x * s2y
x_left = max(box1[0], box2[0])
y_left = max(box1[1], box2[1])
x_right = min(box1[2], box2[2])
y_right = min(box1[3], box2[3])
if x_left > x_right or y_left > y_right:
return 0
s_delta = (x_right - x_left) * (y_right - y_left)
iou = s_delta / (s1 + s2 - s_delta)
return iou


filepath = "F:/data/classify/晚上数据/image/"
green_light_path_train = "F:/data/VOCdevkit/light_9_3/train/green/"
red_light_path_train = "F:/data/VOCdevkit/light_9_3/train/red/"
yellow_light_path_train = "F:/data/VOCdevkit/light_9_3/train/yellow/"
black_light_path_train = "F:/data/VOCdevkit/light_9_3/train/black/"
negative_path_train = "F:/data/VOCdevkit/light_9_3/train/neg/"

green_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/green/"
red_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/red/"
yellow_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/yellow/"
black_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/black/"
negative_path_valid = "F:/data/VOCdevkit/light_9_3/valid/neg/"

#label:4(open) label:5(close)

min_w = 20
min_h = 20
n_idx = 0
p_idx = 0
img_num = 0

for root,dirs,files in os.walk(filepath):
valid_count = 0
for file in files:
valid_count += 1
postfix = os.path.splitext(file)[1].lower()
if (postfix == ".jpg"):
moveFlag = False
imgpath = os.path.join(root, file)
annopath = imgpath.replace('image', 'label')
annopath = annopath.replace('jpg', 'xml')
door_list = []
airplane = []

dom_tree = xml.dom.minidom.parse(annopath)
annotation = dom_tree.documentElement
objects = annotation.getElementsByTagName("object")
for object in objects:
name = object.getElementsByTagName("name")[0]
name_data = name.childNodes[0].data
bndbox = object.getElementsByTagName("bndbox")[0]
xmin = bndbox.getElementsByTagName("xmin")[0]
xmin_data = xmin.childNodes[0].data
ymin = bndbox.getElementsByTagName("ymin")[0]
ymin_data = ymin.childNodes[0].data
xmax = bndbox.getElementsByTagName("xmax")[0]
xmax_data = xmax.childNodes[0].data
ymax = bndbox.getElementsByTagName("ymax")[0]
ymax_data = ymax.childNodes[0].data
bbox = [xmin_data, ymin_data, xmax_data, ymax_data, name_data]
door_list.append(bbox)
img_num += 1

if (len(door_list) != 0):
img = cv2.imdecode(np.fromfile(imgpath,dtype=np.uint8),-1)
w, h = img.shape[1], img.shape[0]
# gt_bboxes = np.array(door_list, dtype=np.float32).reshape(-1, 5)
for i in range(len(door_list)):
x_left = int(door_list[i][0])
y_top = int(door_list[i][1])
x_right = int(door_list[i][2])
y_bottom = int(door_list[i][3])
crop_w = x_right - x_left + 1
crop_h = y_bottom - y_top + 1

neg_num = 0
end_flag = False
while (neg_num < 3):
neg_w = npr.randint(int(crop_w * 0.8), np.ceil(1.25 * crop_w))
neg_h = npr.randint(int(crop_h * 0.8), np.ceil(1.25 * crop_h))
neg_x = npr.randint(0, w)
neg_y = npr.randint(0, h)
neg_x = min(w - neg_w - 1, neg_x)
neg_y = min(h - neg_h - 1, neg_y)
crop_box = np.array([neg_x, neg_y, neg_x+neg_w, neg_y+neg_h])
label_box = list(map(float, door_list[i][0:-1]))
Iou = iou(crop_box, label_box)
if (np.max(Iou) < 0.3):
if valid_count%10 != 0:
save_file = os.path.join(negative_path_train, "%s.jpg"%n_idx)
else:
save_file = os.path.join(negative_path_valid, "%s.jpg" % n_idx)
cropped_im = img[neg_y : neg_y + neg_h, neg_x : neg_x + neg_w, :]
cv2.imwrite(save_file, cropped_im)
n_idx += 1
neg_num += 1

if (end_flag):
break

pos_num = 0
while (pos_num < 3):
pos_w = npr.randint(int(crop_w * 0.8), np.ceil(1.25 * crop_w))
pos_h = npr.randint(int(crop_h * 0.8), np.ceil(1.25 * crop_h))
delta_x = npr.randint(-crop_w * 0.2, crop_w * 0.2)
delta_y = npr.randint(-crop_h * 0.2, crop_h * 0.2)
pos_x = max(x_left + crop_w / 2 + delta_x - pos_w / 2, 0)
pos_y = max(y_top + crop_h / 2 + delta_y - pos_h / 2, 0)
if pos_x + pos_w > w or pos_y + pos_h > h:
continue
crop_box = np.array([pos_x, pos_y, pos_x+pos_w, pos_y+pos_h])
label_box = list(map(float, door_list[i][0:-1]))
Iou = iou(crop_box, label_box)
if (Iou >= 0.65):
cropped_im = img[int(pos_y) : int(pos_y+pos_h), int(pos_x) : int(pos_x+pos_w), :]
if (door_list[i][-1] == "red_light"):
if valid_count % 10 != 0:
save_file = os.path.join(red_light_path_train, "%s.jpg" % p_idx)
else:
save_file = os.path.join(red_light_path_valid, "%s.jpg" % p_idx)
elif (door_list[i][-1] == "yellow_light"):
if valid_count % 10 != 0:
save_file = os.path.join(yellow_light_path_train, "%s.jpg" % p_idx)
else:
save_file = os.path.join(yellow_light_path_valid, "%s.jpg" % p_idx)
elif (door_list[i][-1] == "green_light"):
if valid_count % 10 != 0:
save_file = os.path.join(green_light_path_train, "%s.jpg" % p_idx)
else:
save_file = os.path.join(green_light_path_valid, "%s.jpg" % p_idx)
elif (door_list[i][-1] == "black_light"):
if valid_count % 10 != 0:
save_file = os.path.join(black_light_path_train, "%s.jpg" % p_idx)
else:
save_file = os.path.join(black_light_path_valid, "%s.jpg" % p_idx)
cv2.imwrite(save_file, cropped_im)
p_idx += 1
pos_num += 1

print ("%s images done, pos: %s neg: %s"%(img_num, p_idx, n_idx))

每个目标在扩大20%的范围内随机裁剪3个正样本,三个负样本

cfg文件

定义配置文件,描述数据路径,训练的batch,类别数量,图片resize高宽

1
2
3
4
5
6
7
8
batch_size:256
num_classes:5
train_path:/home/xxx/data/light_9_3/light_9_3_train.txt
test_path:/home/xxx/data/light_9_3/light_9_3_valid.txt
size_w:96
size_h:96
flip:0
mean_value:0

solver文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
net: "light-classify-train.prototxt"
test_iter: 13
test_interval: 20
#test_initialization: false
display: 20
average_loss: 20
lr_policy: "multifixed"
#stepsize: 1000
#gamma: 0.1
#base_lr: 0.0001
stagelr: 0.0001
stagelr: 0.001
stagelr: 0.0001
stagelr: 0.00001
stageiter: 500
stageiter: 1500
stageiter: 3000
stageiter: 4500
max_iter: 5000
iter_size: 2
momentum: 0.9
weight_decay: 0.0005
snapshot: 1000
snapshot_prefix: "../output/light_9_3/door-classifyxx"
solver_mode: GPU

模型文件

自定义的数据读取层如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
name: "YOLOV3-TINY"
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'label'
include {
phase:TRAIN
}
python_param {
module: 'classify-input'
layer: 'classifyInput'
param_str: "'cfg_path': ./cfg.txt"
}
}

layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'label'
include {
phase:TEST
}
python_param {
module: 'classify-input'
layer: 'classifyInput'
param_str: "'cfg_path': ./cfg.txt"
}
}

除了最后一层loss,剩下的和推理的一样

训练

1
2
cd path/light_classify/cfg
sh train-door-classify.sh

测试

python测试:

1
2
cd path/light_classify
python test-classify-light.py

TensorRT

使用docker,运行TensorRT-caffe项目