master
/ loadData.py

loadData.py @master raw · history · blame

import os
import glob
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import scipy.sparse
import scipy.io as sio
import numpy.random as npr
import time
from config import cfg

SCALES = (600,)
MAX_SIZE = 1000
PIXEL_MEANS = cfg.PIXEL_MEANS


class DataLayer(object):
    def __init__(self, rootPath):
        self.xmlPath = os.path.join(rootPath, "Annotations")
        self.imgPath = os.path.join(rootPath, "JPEGImages")
        self.point = 0  # 数据加载指针
        # self.isOpen = True  # 数据是否加载完,False代表已全部加载
        self.classes = (
            '__background__',  # always index 0
            'balloon')
        self._class_to_ind = dict(
            list(zip(self.classes, list(range(len(self.classes))))))
        self.images = []
        self.annotaions = []
        self.filenames = []
        self._load()
        self.length = len(self.images)

    # def refresh(self):
    #     self.point = 0  # 数据加载指针
    #     self.isOpen = True  # 数据是否加载完,False代表已全部加载

    def im_list_to_blob(self, ims):
        """Convert a list of images into a network input.

      Assumes images are already prepared (means subtracted, BGR order, ...).
      """
        max_shape = np.array([im.shape for im in ims]).max(axis=0)
        num_images = len(ims)
        blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32)
        for i in range(num_images):
            im = ims[i]
            blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
        return blob

    def prep_im_for_blob(self, im, pixel_means, target_size, max_size):
        """Mean subtract and scale an image for use in a blob."""
        im = im.astype(np.float32, copy=False)
        im -= pixel_means
        im_shape = im.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)
        im = cv2.resize(
            im,
            None,
            None,
            fx=im_scale,
            fy=im_scale,
            interpolation=cv2.INTER_LINEAR)

        return im, im_scale

    def _get_image_blob(self, images, scale_inds):
        """Builds an input blob from the images in the roidb at the specified
      scales.
      """
        num_images = len(images)
        processed_ims = []
        im_scales = []
        for i in range(num_images):
            im = images[i]
            target_size = SCALES[scale_inds[i]]
            im, im_scale = self.prep_im_for_blob(im, PIXEL_MEANS, target_size, MAX_SIZE)
            im_scales.append(im_scale)
            processed_ims.append(im)

        # Create a blob to hold the input images
        blob = self.im_list_to_blob(processed_ims)

        return blob, im_scales

    def get_minibatch(self):
        """Given a roidb, construct a minibatch sampled from it."""
        imageArray = np.asarray([self.images[self.point % self.length]])
        annotationArray = self.annotaions[self.point % self.length]
        num_images = len(imageArray)
        # Sample random scales to use for each image in this batch
        random_scale_inds = npr.randint(
            0, high=len(SCALES), size=num_images)

        # Get the input image blob, formatted for caffe
        im_blob, im_scales = self._get_image_blob(imageArray, random_scale_inds)

        assert len(im_scales) == 1, "Single batch only"
        assert len(im_blob) == 1, "Single batch only"

        blobs = {'data': im_blob}

        # gt boxes: (x1, y1, x2, y2, cls)
        gt_inds = np.where(annotationArray['gt_classes'] != 0)[0]
        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
        gt_boxes[:, 0:4] = annotationArray['boxes'][gt_inds, :] * im_scales[0]
        gt_boxes[:, 4] = annotationArray['gt_classes'][gt_inds]
        blobs['gt_boxes'] = gt_boxes
        blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # [height, width, im_scales]

        return blobs

    def forward(self):
        """Get blobs and copy them into this layer's top blob vector."""
        blobs = self.get_minibatch()
        self.point += 1
        # if self.point >= len(self.images):
        #     self.isOpen = False
        return blobs

    def _load(self):
        imageArray = []
        annotationArray = []
        for imgP in glob.glob(self.imgPath + "/*.*"):
            # 读取图片,
            img = cv2.imread(imgP)  # 读取的是BGR格式
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 将图片从 BGR 模式转为 RGB 模式
            imageArray.append(img)
            # 读取xml中的标签内容
            xmlP = imgP.replace("JPEGImages", "Annotations")
            xmlP = xmlP.replace("jpg", "xml")
            annotation = self._load_pascal_annotation(xmlP)
            annotationArray.append(annotation)
            self.filenames.append(xmlP)
        self.images = imageArray
        self.annotaions = annotationArray

    def _load_pascal_annotation(self, filename):
        """
    Load image and bounding boxes info from XML file in the PASCAL VOC
    format.
    """
        tree = ET.parse(filename)
        objs = tree.findall('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, len(self.classes)), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        seg_areas = np.zeros((num_objs), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            x1 = float(bbox.find('xmin').text) - 1
            y1 = float(bbox.find('ymin').text) - 1
            x2 = float(bbox.find('xmax').text) - 1
            y2 = float(bbox.find('ymax').text) - 1
            cls = self._class_to_ind[obj.find('name').text.lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'flipped': False,
            'seg_areas': seg_areas
        }

if __name__ == '__main__':
    datalayer = DataLayer("./dataset/val")
    first = datalayer.forward()
    print(first)