caffe-classify

环境

caffe, Ubuntu16.04, python2.7

编译caffe

这里需要注意一点，读取数据用python读取图片，不用官方推荐的lmdb数据格式，这样更容易定位问题

定义类名为

1	classifyInput

的python类文件：

1	classify-input.py

import caffe
import numpy as np
import yaml
import os
import cPickle
import cv2

class classifyInput(caffe.Layer):

    def load_data_annotations(self, index):
        label, img_path = index.split(' ')
        return {'label' : label, 'image' : img_path, 'flipped' : False}


    def get_rand_idx(self):
        inds = np.arange(len(self._train_roidb))
        inds = np.reshape(inds, (-1, 2))
        row_perm = np.random.permutation(np.arange(inds.shape[0]))
        inds = np.reshape(inds[row_perm, :], (-1,))
        self._perm = inds
        self._cur = 0


    def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str_)
        self._cfg_path = layer_params['cfg_path']
        cfg_dict = {}
        with open(self._cfg_path) as fp:
            for line in fp:
                data = line.strip().split(':')
                cfg_dict[data[0]] = data[1]
        self._batch_size = int(cfg_dict['batch_size'])
        self._num_classes = int(cfg_dict['num_classes'])
        self._train_path = cfg_dict['train_path']
        self._test_path = cfg_dict['test_path']
        self._size_w = int(cfg_dict['size_w'])
        self._size_h = int(cfg_dict['size_h'])
        self._flip = int(cfg_dict['flip'])
        self._mean_value = int(cfg_dict['mean_value'])

        self._train_image_index = []
        with open(self._train_path) as fp:
            for line in fp:
                data_path = line.strip()
                self._train_image_index.append(data_path)
        self._test_image_index = []
        with open(self._test_path) as fp:
            for line in fp:
                data_path = line.strip()
                self._test_image_index.append(data_path)

        self._test_roidb = [self.load_data_annotations(index) for index in self._test_image_index]
        self._train_roidb = [self.load_data_annotations(index) for index in self._train_image_index]
        if(self._flip == 1):
            num_images = len(self._train_roidb)
            for i in xrange(num_images):
                self._train_roidb.append({'label' : self._train_roidb[i]['label'], 'image' : self._train_roidb[i]['image'], 'flipped' : True})
                self._train_image_index = self._train_image_index * 2

        self._name_to_top_map = {}
        self._name_to_top_map['data'] = 0
        self._name_to_top_map['label'] = 1
        top[0].reshape(1, 3, self._size_h, self._size_w)
        top[1].reshape(1,)
        self.get_rand_idx()
        self._test_cur = 0


    def forward(self, bottom, top):
        if (self.phase == caffe.TRAIN):
            image_num = len(self._train_roidb)
            if(self._cur + self._batch_size > image_num):
                self.get_rand_idx()
            db_inds = self._perm[self._cur:self._cur + self._batch_size]
            self._cur += self._batch_size
            minibatch_db = [self._train_roidb[i] for i in db_inds]

        elif (self.phase == caffe.TEST):
            test_image_num = len(self._test_roidb)
            if(self._test_cur + self._batch_size > test_image_num):
                minibatch_db = self._test_roidb[self._test_cur:] + self._test_roidb[:self._batch_size - test_image_num + self._test_cur]
                self._test_cur = self._batch_size - test_image_num + self._test_cur
            else:
                minibatch_db = self._test_roidb[self._test_cur : self._test_cur + self._batch_size]
                self._test_cur += self._batch_size

        im_blob = np.zeros((len(minibatch_db), 3, self._size_h, self._size_w), dtype=np.float32)
        im_labels = np.zeros((len(minibatch_db),), dtype=np.float32)
        for i in xrange(len(minibatch_db)):
            im = cv2.imread(minibatch_db[i]['image'])
            im = cv2.resize(im, (self._size_w, self._size_h), interpolation=cv2.INTER_LINEAR)
            if(minibatch_db[i]['flipped']):
                im = im[:, ::-1, :]
            im = im.astype(np.float32, copy=False)
            if(self._mean_value == 1):
                pixel_means = np.array([[[103.52, 116.28, 123.675]]])
                im -= pixel_means
            else:
                im = im / 255.0
            im_blob[i, :] = im[:,:,::-1].transpose([2,0,1])
            im_labels[i] = minibatch_db[i]['label']
        blobs = {'data': im_blob, 'label': im_labels}
        for blob_name, blob in blobs.iteritems():
            top_ind = self._name_to_top_map[blob_name]
            # Reshape net's input blobs
            top[top_ind].reshape(*(blob.shape))
            # Copy data into net's input blobs
            top[top_ind].data[...] = blob.astype(np.float32, copy=False)


    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass


    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

继承caffe.Layer，复写setup()方法和forward()方法，定义完后将文件放在

1	path/caffe-master/python

文件夹下

添加环境变量

1	export LD_LIBRARY_PATH=/usr/local/cuda-8.0/lib64:/usr/local/lib:/xxx/software/anaconda2/lib/

制作数据

将数据标注成pascal voc的格式，再使用如下脚本随机切取目标

import os
import cv2
import numpy.random as npr
import numpy as np
import  xml.dom.minidom

def IoU(box, boxes):
    box_area = box[2] * box[3]
    area = boxes[:, 3] * boxes[:, 4]
    xx1 = np.maximum(box[0]-box[2]/2, boxes[:, 1]-boxes[:, 3]/2)
    yy1 = np.maximum(box[1]-box[3]/2, boxes[:, 2]-boxes[:, 4]/2)
    xx2 = np.minimum(box[0]+box[2]/2, boxes[:, 1]+boxes[:, 3]/2)
    yy2 = np.minimum(box[1]+box[3]/2, boxes[:, 2]+boxes[:, 4]/2)

    w = np.maximum(0, xx2 - xx1)
    h = np.maximum(0, yy2 - yy1)

    inter = w * h
    ovr = inter / (box_area + area - inter)
    return ovr

def iou(box1, box2):
    s1x = box1[2] - box1[0]
    s1y = box1[3] - box1[1]
    s1 = s1x * s1y
    s2x = box2[2] - box2[0]
    s2y = box2[3] - box2[1]
    s2 = s2x * s2y
    x_left = max(box1[0], box2[0])
    y_left = max(box1[1], box2[1])
    x_right = min(box1[2], box2[2])
    y_right = min(box1[3], box2[3])
    if x_left > x_right or y_left > y_right:
        return 0
    s_delta = (x_right - x_left) * (y_right - y_left)
    iou = s_delta / (s1 + s2 - s_delta)
    return iou


filepath = "F:/data/classify/晚上数据/image/"
green_light_path_train = "F:/data/VOCdevkit/light_9_3/train/green/"
red_light_path_train = "F:/data/VOCdevkit/light_9_3/train/red/"
yellow_light_path_train = "F:/data/VOCdevkit/light_9_3/train/yellow/"
black_light_path_train = "F:/data/VOCdevkit/light_9_3/train/black/"
negative_path_train = "F:/data/VOCdevkit/light_9_3/train/neg/"

green_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/green/"
red_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/red/"
yellow_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/yellow/"
black_light_path_valid = "F:/data/VOCdevkit/light_9_3/valid/black/"
negative_path_valid = "F:/data/VOCdevkit/light_9_3/valid/neg/"

#label:4(open)  label:5(close)

min_w = 20
min_h = 20
n_idx = 0
p_idx = 0
img_num = 0

for root,dirs,files in os.walk(filepath):
    valid_count = 0
    for file in files:
        valid_count += 1
        postfix = os.path.splitext(file)[1].lower()
        if (postfix == ".jpg"):
            moveFlag = False
            imgpath = os.path.join(root, file)
            annopath = imgpath.replace('image', 'label')
            annopath = annopath.replace('jpg', 'xml')
            door_list = []
            airplane = []

            dom_tree = xml.dom.minidom.parse(annopath)
            annotation = dom_tree.documentElement
            objects = annotation.getElementsByTagName("object")
            for object in objects:
                name = object.getElementsByTagName("name")[0]
                name_data = name.childNodes[0].data
                bndbox = object.getElementsByTagName("bndbox")[0]
                xmin = bndbox.getElementsByTagName("xmin")[0]
                xmin_data = xmin.childNodes[0].data
                ymin = bndbox.getElementsByTagName("ymin")[0]
                ymin_data = ymin.childNodes[0].data
                xmax = bndbox.getElementsByTagName("xmax")[0]
                xmax_data = xmax.childNodes[0].data
                ymax = bndbox.getElementsByTagName("ymax")[0]
                ymax_data = ymax.childNodes[0].data
                bbox = [xmin_data, ymin_data, xmax_data, ymax_data, name_data]
                door_list.append(bbox)
            img_num += 1

            if (len(door_list) != 0):
                img = cv2.imdecode(np.fromfile(imgpath,dtype=np.uint8),-1)
                w, h = img.shape[1], img.shape[0]
                # gt_bboxes = np.array(door_list, dtype=np.float32).reshape(-1, 5)
                for i in range(len(door_list)):
                    x_left = int(door_list[i][0])
                    y_top = int(door_list[i][1])
                    x_right = int(door_list[i][2])
                    y_bottom = int(door_list[i][3])
                    crop_w = x_right - x_left + 1
                    crop_h = y_bottom - y_top + 1

                    neg_num = 0
                    end_flag = False
                    while (neg_num < 3):
                        neg_w = npr.randint(int(crop_w * 0.8), np.ceil(1.25 * crop_w))
                        neg_h = npr.randint(int(crop_h * 0.8), np.ceil(1.25 * crop_h))
                        neg_x = npr.randint(0, w)
                        neg_y = npr.randint(0, h)
                        neg_x = min(w - neg_w - 1, neg_x)
                        neg_y = min(h - neg_h - 1, neg_y)
                        crop_box = np.array([neg_x, neg_y, neg_x+neg_w, neg_y+neg_h])
                        label_box = list(map(float, door_list[i][0:-1]))
                        Iou = iou(crop_box, label_box)
                        if (np.max(Iou) < 0.3):
                            if valid_count%10 != 0:
                                save_file = os.path.join(negative_path_train, "%s.jpg"%n_idx)
                            else:
                                save_file = os.path.join(negative_path_valid, "%s.jpg" % n_idx)
                            cropped_im = img[neg_y : neg_y + neg_h, neg_x : neg_x + neg_w, :]
                            cv2.imwrite(save_file, cropped_im)
                            n_idx += 1
                            neg_num += 1

                    if (end_flag):
                        break

                    pos_num = 0
                    while (pos_num < 3):
                        pos_w = npr.randint(int(crop_w * 0.8), np.ceil(1.25 * crop_w))
                        pos_h = npr.randint(int(crop_h * 0.8), np.ceil(1.25 * crop_h))
                        delta_x = npr.randint(-crop_w * 0.2, crop_w * 0.2)
                        delta_y = npr.randint(-crop_h * 0.2, crop_h * 0.2)
                        pos_x = max(x_left + crop_w / 2 + delta_x - pos_w / 2, 0)
                        pos_y = max(y_top + crop_h / 2 + delta_y - pos_h / 2, 0)
                        if pos_x + pos_w > w or pos_y + pos_h > h:
                            continue
                        crop_box = np.array([pos_x, pos_y, pos_x+pos_w, pos_y+pos_h])
                        label_box = list(map(float, door_list[i][0:-1]))
                        Iou = iou(crop_box, label_box)
                        if (Iou >= 0.65):
                            cropped_im = img[int(pos_y) : int(pos_y+pos_h), int(pos_x) : int(pos_x+pos_w), :]
                            if (door_list[i][-1] == "red_light"):
                                if valid_count % 10 != 0:
                                    save_file = os.path.join(red_light_path_train, "%s.jpg" % p_idx)
                                else:
                                    save_file = os.path.join(red_light_path_valid, "%s.jpg" % p_idx)
                            elif (door_list[i][-1] == "yellow_light"):
                                if valid_count % 10 != 0:
                                    save_file = os.path.join(yellow_light_path_train, "%s.jpg" % p_idx)
                                else:
                                    save_file = os.path.join(yellow_light_path_valid, "%s.jpg" % p_idx)
                            elif (door_list[i][-1] == "green_light"):
                                if valid_count % 10 != 0:
                                    save_file = os.path.join(green_light_path_train, "%s.jpg" % p_idx)
                                else:
                                    save_file = os.path.join(green_light_path_valid, "%s.jpg" % p_idx)
                            elif (door_list[i][-1] == "black_light"):
                                if valid_count % 10 != 0:
                                    save_file = os.path.join(black_light_path_train, "%s.jpg" % p_idx)
                                else:
                                    save_file = os.path.join(black_light_path_valid, "%s.jpg" % p_idx)
                            cv2.imwrite(save_file, cropped_im)
                            p_idx += 1
                            pos_num += 1

            print ("%s images done, pos: %s neg: %s"%(img_num, p_idx, n_idx))

每个目标在扩大20%的范围内随机裁剪3个正样本，三个负样本

cfg文件

定义配置文件，描述数据路径，训练的batch，类别数量，图片resize高宽

batch_size:256
num_classes:5
train_path:/home/xxx/data/light_9_3/light_9_3_train.txt
test_path:/home/xxx/data/light_9_3/light_9_3_valid.txt
size_w:96
size_h:96
flip:0
mean_value:0

solver文件

net: "light-classify-train.prototxt"
test_iter: 13
test_interval: 20
#test_initialization: false
display: 20
average_loss: 20
lr_policy: "multifixed"
#stepsize: 1000
#gamma: 0.1
#base_lr: 0.0001
stagelr: 0.0001
stagelr: 0.001
stagelr: 0.0001
stagelr: 0.00001
stageiter: 500
stageiter: 1500
stageiter: 3000
stageiter: 4500
max_iter: 5000
iter_size: 2
momentum: 0.9
weight_decay: 0.0005
snapshot: 1000
snapshot_prefix: "../output/light_9_3/door-classifyxx"
solver_mode: GPU

模型文件

自定义的数据读取层如下：

name: "YOLOV3-TINY"
layer {
  name: 'input-data'
  type: 'Python'
  top: 'data'
  top: 'label'
  include {
    phase:TRAIN
  }
  python_param {
    module: 'classify-input'
    layer: 'classifyInput'
    param_str: "'cfg_path': ./cfg.txt"
  }
}

layer {
  name: 'input-data'
  type: 'Python'
  top: 'data'
  top: 'label'
  include {
    phase:TEST
  }
  python_param {
    module: 'classify-input'
    layer: 'classifyInput'
    param_str: "'cfg_path': ./cfg.txt"
  }
}

除了最后一层loss，剩下的和推理的一样

训练

1 2	cd path/light_classify/cfg sh train-door-classify.sh

测试

python测试：

1 2	cd path/light_classify python test-classify-light.py

TensorRT

使用docker，运行TensorRT-caffe项目