import os
import glob
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import scipy.sparse
import scipy.io as sio
import numpy.random as npr
import time
from config import cfg
SCALES = (600,)
MAX_SIZE = 1000
PIXEL_MEANS = cfg.PIXEL_MEANS
class DataLayer(object):
def __init__(self, rootPath):
self.xmlPath = os.path.join(rootPath, "Annotations")
self.imgPath = os.path.join(rootPath, "JPEGImages")
self.point = 0 # 数据加载指针
# self.isOpen = True # 数据是否加载完,False代表已全部加载
self.classes = (
'__background__', # always index 0
'balloon')
self._class_to_ind = dict(
list(zip(self.classes, list(range(len(self.classes))))))
self.images = []
self.annotaions = []
self.filenames = []
self._load()
self.length = len(self.images)
# def refresh(self):
# self.point = 0 # 数据加载指针
# self.isOpen = True # 数据是否加载完,False代表已全部加载
def im_list_to_blob(self, ims):
"""Convert a list of images into a network input.
Assumes images are already prepared (means subtracted, BGR order, ...).
"""
max_shape = np.array([im.shape for im in ims]).max(axis=0)
num_images = len(ims)
blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32)
for i in range(num_images):
im = ims[i]
blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
return blob
def prep_im_for_blob(self, im, pixel_means, target_size, max_size):
"""Mean subtract and scale an image for use in a blob."""
im = im.astype(np.float32, copy=False)
im -= pixel_means
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(
im,
None,
None,
fx=im_scale,
fy=im_scale,
interpolation=cv2.INTER_LINEAR)
return im, im_scale
def _get_image_blob(self, images, scale_inds):
"""Builds an input blob from the images in the roidb at the specified
scales.
"""
num_images = len(images)
processed_ims = []
im_scales = []
for i in range(num_images):
im = images[i]
target_size = SCALES[scale_inds[i]]
im, im_scale = self.prep_im_for_blob(im, PIXEL_MEANS, target_size, MAX_SIZE)
im_scales.append(im_scale)
processed_ims.append(im)
# Create a blob to hold the input images
blob = self.im_list_to_blob(processed_ims)
return blob, im_scales
def get_minibatch(self):
"""Given a roidb, construct a minibatch sampled from it."""
imageArray = np.asarray([self.images[self.point % self.length]])
annotationArray = self.annotaions[self.point % self.length]
num_images = len(imageArray)
# Sample random scales to use for each image in this batch
random_scale_inds = npr.randint(
0, high=len(SCALES), size=num_images)
# Get the input image blob, formatted for caffe
im_blob, im_scales = self._get_image_blob(imageArray, random_scale_inds)
assert len(im_scales) == 1, "Single batch only"
assert len(im_blob) == 1, "Single batch only"
blobs = {'data': im_blob}
# gt boxes: (x1, y1, x2, y2, cls)
gt_inds = np.where(annotationArray['gt_classes'] != 0)[0]
gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
gt_boxes[:, 0:4] = annotationArray['boxes'][gt_inds, :] * im_scales[0]
gt_boxes[:, 4] = annotationArray['gt_classes'][gt_inds]
blobs['gt_boxes'] = gt_boxes
blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # [height, width, im_scales]
return blobs
def forward(self):
"""Get blobs and copy them into this layer's top blob vector."""
blobs = self.get_minibatch()
self.point += 1
# if self.point >= len(self.images):
# self.isOpen = False
return blobs
def _load(self):
imageArray = []
annotationArray = []
for imgP in glob.glob(self.imgPath + "/*.*"):
# 读取图片,
img = cv2.imread(imgP) # 读取的是BGR格式
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将图片从 BGR 模式转为 RGB 模式
imageArray.append(img)
# 读取xml中的标签内容
xmlP = imgP.replace("JPEGImages", "Annotations")
xmlP = xmlP.replace("jpg", "xml")
annotation = self._load_pascal_annotation(xmlP)
annotationArray.append(annotation)
self.filenames.append(xmlP)
self.images = imageArray
self.annotaions = annotationArray
def _load_pascal_annotation(self, filename):
"""
Load image and bounding boxes info from XML file in the PASCAL VOC
format.
"""
tree = ET.parse(filename)
objs = tree.findall('object')
num_objs = len(objs)
boxes = np.zeros((num_objs, 4), dtype=np.float32)
gt_classes = np.zeros((num_objs), dtype=np.int32)
overlaps = np.zeros((num_objs, len(self.classes)), dtype=np.float32)
# "Seg" area for pascal is just the box area
seg_areas = np.zeros((num_objs), dtype=np.float32)
# Load object bounding boxes into a data frame.
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
cls = self._class_to_ind[obj.find('name').text.lower().strip()]
boxes[ix, :] = [x1, y1, x2, y2]
gt_classes[ix] = cls
overlaps[ix, cls] = 1.0
seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
overlaps = scipy.sparse.csr_matrix(overlaps)
return {
'boxes': boxes,
'gt_classes': gt_classes,
'gt_overlaps': overlaps,
'flipped': False,
'seg_areas': seg_areas
}
if __name__ == '__main__':
datalayer = DataLayer("./dataset/val")
first = datalayer.forward()
print(first)