master
/ lib / proposal_target_layer.py

proposal_target_layer.py @master raw · history · blame

# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick, Sean Bell and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import numpy.random as npr
from config import cfg
from lib.bbox_transform import bbox_transform
from lib.bbox import bbox_overlaps

import torch


def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
    """
  Assign object detection proposals to ground-truth targets. Produces proposal
  classification labels and bounding-box regression targets.
  """

    # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
    # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
    all_rois = rpn_rois
    all_scores = rpn_scores

    # Include ground-truth boxes in the set of candidate rois
    if cfg.TRAIN.USE_GT:
        zeros = rpn_rois.new_zeros(gt_boxes.shape[0], 1)
        all_rois = torch.cat((all_rois, torch.cat(
            (zeros, gt_boxes[:, :-1]), 1)), 0)
        # not sure if it a wise appending, but anyway i am not using it
        all_scores = torch.cat((all_scores, zeros), 0)

    num_images = 1
    rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
    fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image))

    # Sample rois with classification labels and bounding box regression
    # targets
    labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
        all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image,
        _num_classes)

    rois = rois.view(-1, 5)
    roi_scores = roi_scores.view(-1)
    labels = labels.view(-1, 1)
    bbox_targets = bbox_targets.view(-1, _num_classes * 4)
    bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4)
    bbox_outside_weights = (bbox_inside_weights > 0).float()

    return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights


def _get_bbox_regression_labels(bbox_target_data, num_classes):
    """Bounding-box regression targets (bbox_target_data) are stored in a
  compact form N x (class, tx, ty, tw, th)

  This function expands those targets into the 4-of-4*K representation used
  by the network (i.e. only one class has non-zero targets).

  Returns:
      bbox_target (ndarray): N x 4K blob of regression targets
      bbox_inside_weights (ndarray): N x 4K blob of loss weights
  """
    # Inputs are tensor

    clss = bbox_target_data[:, 0]
    bbox_targets = clss.new_zeros(clss.numel(), 4 * num_classes)
    bbox_inside_weights = clss.new_zeros(bbox_targets.shape)
    inds = (clss > 0).nonzero().view(-1)
    if inds.numel() > 0:
        clss = clss[inds].contiguous().view(-1, 1)
        dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
        dim2_inds = torch.cat(
            [4 * clss, 4 * clss + 1, 4 * clss + 2, 4 * clss + 3], 1).long()
        bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:]
        bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(
            cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds)

    return bbox_targets, bbox_inside_weights


def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""
    # Inputs are tensor

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = bbox_transform(ex_rois, gt_rois)
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) /
                   targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS))
    return torch.cat([labels.unsqueeze(1), targets], 1)


def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data)
    max_overlaps, gt_assignment = overlaps.max(1)
    labels = gt_boxes[gt_assignment, [4]]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)

    # bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (
    #     max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) todo 原来是这句话
    bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (
            max_overlaps > 0) == 2).nonzero().view(-1)

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.numel() > 0 and bg_inds.numel() > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(
                np.arange(0, fg_inds.numel()),
                size=int(fg_rois_per_image),
                replace=False)).long().to(gt_boxes.device)]
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.numel() < bg_rois_per_image
        bg_inds = bg_inds[torch.from_numpy(
            npr.choice(
                np.arange(0, bg_inds.numel()),
                size=int(bg_rois_per_image),
                replace=to_replace)).long().to(gt_boxes.device)]
    elif fg_inds.numel() > 0:
        to_replace = fg_inds.numel() < rois_per_image
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(
                np.arange(0, fg_inds.numel()),
                size=int(rois_per_image),
                replace=to_replace)).long().to(gt_boxes.device)]
        fg_rois_per_image = rois_per_image
    elif bg_inds.numel() > 0:
        to_replace = bg_inds.numel() < rois_per_image
        bg_inds = bg_inds[torch.from_numpy(
            npr.choice(
                np.arange(0, bg_inds.numel()),
                size=int(rois_per_image),
                replace=to_replace)).long().to(gt_boxes.device)]
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    keep_inds = torch.cat([fg_inds, bg_inds], 0)
    # Select sampled values from various arrays:
    labels = labels[keep_inds].contiguous()
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds].contiguous()
    roi_scores = all_scores[keep_inds].contiguous()

    bbox_target_data = _compute_targets(
        rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data,
        labels.data)

    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights