Source code for easycv.core.evaluation.custom_cocotools.cocoeval

# Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
# All rights reserved.
__author__ = 'tsungyi'

import copy
import datetime
import logging
import os
import time
from collections import defaultdict

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from xtcocotools import mask as maskUtils

matplotlib.use('Agg')


[docs]class COCOeval:
    # Interface for evaluating detection on the Microsoft COCO dataset.
    #
    # The usage for CocoEval is as follows:
    #  cocoGt=..., cocoDt=...       # load dataset and results
    #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
    #  E.params.recThrs = ...;      # set parameters as desired
    #  E.evaluate();                # run per image evaluation
    #  E.accumulate();              # accumulate per image results
    #  E.summarize();               # display summary metrics of results
    # For example usage see evalDemo.m and http://mscoco.org/.
    #
    # The evaluation parameters are as follows (defaults in brackets):
    #  imgIds     - [all] N img ids to use for evaluation
    #  catIds     - [all] K cat ids to use for evaluation
    #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
    #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
    #  areaRng    - [...] A=4 object area ranges for evaluation
    #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
    #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
    #  iouType replaced the now DEPRECATED useSegm parameter.
    #  useCats    - [1] if true use category labels for evaluation
    # Note: if useCats=0 category labels are ignored as in proposal scoring.
    # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
    #
    # evaluate(): evaluates detections on every image and every category and
    # concats the results into the "evalImgs" with fields:
    #  dtIds      - [1xD] id for each of the D detections (dt)
    #  gtIds      - [1xG] id for each of the G ground truths (gt)
    #  dtMatches  - [TxD] matching gt id at each IoU or 0
    #  gtMatches  - [TxG] matching dt id at each IoU or 0
    #  dtScores   - [1xD] confidence of each dt
    #  gtIgnore   - [1xG] ignore flag for each gt
    #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
    #
    # accumulate(): accumulates the per-image, per-category evaluation
    # results in "evalImgs" into the dictionary "eval" with fields:
    #  params     - parameters used for evaluation
    #  date       - date evaluation was performed
    #  counts     - [T,R,K,A,M] parameter dimensions (see above)
    #  precision  - [TxRxKxAxM] precision for every evaluation setting
    #  recall     - [TxKxAxM] max recall for every evaluation setting
    # Note: precision and recall==-1 for settings with no gt objects.
    #
    # summarize(): computes and displays 12 summary metrics based on the
    # "eval" struct. Note that summarize() assumes the evaluation was
    # computed with certain default params (including default area ranges),
    # if not, the display may show NaN outputs for certain metrics. Results
    # of summarize() are stored in a 12 element vector "stats".
    #
    # analyze(): generates plots with detailed breakdown of false positives.
    # Inspired by "Diagnosing Error in Object Detectors" by D. Hoiem et al.
    # Generates one plot per category (80), supercategory (12), and overall
    # (1), multiplied by 4 scales, for a total of (80+12+1)*4=372 plots. Each
    # plo# t contains a series of precision recall curves where each PR curve
    # is guaranteed to be strictly higher than the previous as the evaluation
    # setting becomes more permissive. These plots give insight into errors
    # made by a detector. A more detailed description is given at mscoco.org.
    # Note: analyze() is quite slow as it calls evaluate() multiple times.
    # See also coco, mask, pycocoDemo, pycocoEvalDemo
    #
    # Microsoft COCO Toolbox.      version 2.0
    # Data, paper, and tutorials available at:  http://mscoco.org/
    # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
    # Licensed under the Simplified BSD License [see coco/license.txt]
[docs]    def __init__(self, cocoGt=None, cocoDt=None, iouType='segm', sigmas=None):
        '''
        Initialize CocoEval using coco APIs for gt and dt
        Args:
          cocoGt: coco object with ground truth annotations
          cocoDt: coco object with detection results
          iouType:  type of iou to be computed, bbox for detection task,
            segm for segmentation task
          sigmas: keypoint labelling sigmas.
        Return:
          None
        '''
        if not iouType:
            logging.info('iouType not specified. use default iouType segm')
        if sigmas is not None:
            self.sigmas = sigmas
        else:
            # The default sigmas are used for COCO dataset.
            self.sigmas = np.array([
                .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
                1.07, .87, .87, .89, .89
            ]) / 10.0
        self.cocoGt = cocoGt  # ground truth COCO API
        self.cocoDt = cocoDt  # detections COCO API
        self.params = {}  # evaluation parameters
        self.evalImgs = defaultdict(
            list)  # per-image per-category evaluation results [KxAxI] elements
        self.eval = {}  # accumulated evaluation results
        self._gts = defaultdict(list)  # gt for evaluation
        self._dts = defaultdict(list)  # dt for evaluation
        self.params = Params(iouType=iouType)  # parameters
        self._paramsEval = {}  # parameters for evaluation
        self.stats = []  # result summarization
        self.ious = {}  # ious between all gts and dts
        if cocoGt is not None:
            self.params.imgIds = sorted(cocoGt.getImgIds())
            self.params.catIds = sorted(cocoGt.getCatIds())

    def _prepare(self):
        '''
        Prepare ._gts and ._dts for evaluation based on params
        Return:
          None
        '''

        def _toMask(anns, coco):
            # modify ann['segmentation'] by reference
            for ann in anns:
                rle = coco.annToRLE(ann)
                ann['segmentation'] = rle

        p = self.params
        if p.useCats:
            gts = self.cocoGt.loadAnns(
                self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
            dts = self.cocoDt.loadAnns(
                self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
        else:
            if len(p.keepDtCatIds) > 0:
                dts = self.cocoDt.loadAnns(
                    self.cocoDt.getAnnIds(
                        imgIds=p.imgIds, catIds=p.keepDtCatIds))
            else:
                dts = self.cocoDt.loadAnns(
                    self.cocoDt.getAnnIds(imgIds=p.imgIds))

            if len(p.keepGtCatIds) > 0:
                gts = self.cocoGt.loadAnns(
                    self.cocoGt.getAnnIds(
                        imgIds=p.imgIds, catIds=p.keepGtCatIds))
            else:
                gts = self.cocoGt.loadAnns(
                    self.cocoGt.getAnnIds(imgIds=p.imgIds))

        # convert ground truth to mask if iouType == 'segm'
        if p.iouType == 'segm':
            _toMask(gts, self.cocoGt)
            _toMask(dts, self.cocoDt)
        # set ignore flag
        for gt in gts:
            gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
            gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
            if p.iouType == 'keypoints':
                gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
        self._gts = defaultdict(list)  # gt for evaluation
        self._dts = defaultdict(list)  # dt for evaluation
        for gt in gts:
            self._gts[gt['image_id'], gt['category_id']].append(gt)
        for dt in dts:
            self._dts[dt['image_id'], dt['category_id']].append(dt)
        self.evalImgs = defaultdict(
            list)  # per-image per-category evaluation results
        self.eval = {}  # accumulated evaluation results

[docs]    def evaluate(self):
        '''
        Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
        Return:
          None
        '''
        tic = time.time()
        logging.info('Running per image evaluation...')
        p = self.params
        # add backward compatibility if useSegm is specified in params
        if p.useSegm is not None:
            p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
            logging.info(
                'useSegm (deprecated) is not None. Running {} evaluation'.
                format(p.iouType))
        logging.info('Evaluate annotation type *{}*'.format(p.iouType))
        p.imgIds = list(np.unique(p.imgIds))
        if p.useCats:
            p.catIds = list(np.unique(p.catIds))
        p.maxDets = sorted(p.maxDets)
        self.params = p

        self._prepare()
        # loop through images, area range, max detection number
        catIds = p.catIds if p.useCats else [-1]

        if p.iouType == 'segm' or p.iouType == 'bbox':
            computeIoU = self.computeIoU
        elif p.iouType == 'keypoints':
            computeIoU = self.computeOks
        self.ious = {(imgId, catId): computeIoU(imgId, catId)
                     for imgId in p.imgIds for catId in catIds}

        evaluateImg = self.evaluateImg
        maxDet = p.maxDets[-1]
        self.evalImgs = [
            evaluateImg(imgId, catId, areaRng, maxDet) for catId in catIds
            for areaRng in p.areaRng for imgId in p.imgIds
        ]
        self._paramsEval = copy.deepcopy(self.params)
        toc = time.time()
        logging.info('Per image evaluation DONE (t={:0.2f}s).'.format(toc -
                                                                      tic))

[docs]    def computeIoU(self, imgId, catId):
        p = self.params
        if p.useCats:
            gt = self._gts[imgId, catId]
            dt = self._dts[imgId, catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
        if len(gt) == 0 and len(dt) == 0:
            return []
        inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
        dt = [dt[i] for i in inds]
        if len(dt) > p.maxDets[-1]:
            dt = dt[0:p.maxDets[-1]]

        if p.iouType == 'segm':
            g = [g['segmentation'] for g in gt]
            d = [d['segmentation'] for d in dt]
        elif p.iouType == 'bbox':
            g = [g['bbox'] for g in gt]
            d = [d['bbox'] for d in dt]
        else:
            raise Exception('unknown iouType for iou computation')

        # compute iou between each dt and gt region
        iscrowd = [int(o['iscrowd']) for o in gt]
        ious = maskUtils.iou(d, g, iscrowd)
        return ious

[docs]    def computeOks(self, imgId, catId):
        p = self.params
        # dimention here should be Nxm
        gts = self._gts[imgId, catId]
        dts = self._dts[imgId, catId]
        inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
        dts = [dts[i] for i in inds]
        if len(dts) > p.maxDets[-1]:
            dts = dts[0:p.maxDets[-1]]
        # if len(gts) == 0 and len(dts) == 0:
        if len(gts) == 0 or len(dts) == 0:
            return []
        ious = np.zeros((len(dts), len(gts)))
        sigmas = self.sigmas
        vars = (sigmas * 2)**2
        k = len(sigmas)
        # compute oks between each detection and ground truth object
        for j, gt in enumerate(gts):
            # create bounds for ignore regions(double the gt bbox)
            g = np.array(gt['keypoints'])
            xg = g[0::3]
            yg = g[1::3]
            vg = g[2::3]
            k1 = np.count_nonzero(vg > 0)
            bb = gt['bbox']
            x0 = bb[0] - bb[2]
            x1 = bb[0] + bb[2] * 2
            y0 = bb[1] - bb[3]
            y1 = bb[1] + bb[3] * 2
            for i, dt in enumerate(dts):
                d = np.array(dt['keypoints'])
                xd = d[0::3]
                yd = d[1::3]
                if k1 > 0:
                    # measure the per-keypoint distance if keypoints visible
                    dx = xd - xg
                    dy = yd - yg
                else:
                    # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                    z = np.zeros((k))
                    dx = np.max((z, x0 - xd), axis=0) + np.max(
                        (z, xd - x1), axis=0)
                    dy = np.max((z, y0 - yd), axis=0) + np.max(
                        (z, yd - y1), axis=0)
                e = (dx**2 + dy**2) / vars / (gt['area'] + np.spacing(1)) / 2
                if k1 > 0:
                    e = e[vg > 0]
                ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
        return ious

[docs]    def evaluateImg(self, imgId, catId, aRng, maxDet):
        '''
        perform evaluation for single category and image
        Args:
          imgId:  image id, string
          catId: category id, string
          aRng:  area range, tuple
          maxDet:  maximum detection number
        Return:
          dict (single image results)
        '''
        p = self.params
        if p.useCats:
            gt = self._gts[imgId, catId]
            dt = self._dts[imgId, catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
        if len(gt) == 0 and len(dt) == 0:
            return None
        for g in gt:
            if g['ignore'] or (g['area'] < aRng[0] or g['area'] > aRng[1]):
                g['_ignore'] = 1
            else:
                g['_ignore'] = 0

            # used to fast ignore some gt, avoiding changing the original gt
            if p.targetCatId >= 0 and g['category_id'] != p.targetCatId:
                g['_ignore'] = 1

        # sort dt highest score first, sort gt ignore last
        gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
        gt = [gt[i] for i in gtind]
        dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
        dt = [dt[i] for i in dtind[0:maxDet]]
        iscrowd = [int(o['iscrowd']) for o in gt]
        # load computed ious
        ious = self.ious[imgId, catId][:, gtind] if len(
            self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]

        T = len(p.iouThrs)
        G = len(gt)
        D = len(dt)
        gtm = np.zeros((T, G))
        dtm = np.zeros((T, D))
        gtIg = np.array([g['_ignore'] for g in gt])
        dtIg = np.zeros((T, D))
        if not len(ious) == 0:
            for tind, t in enumerate(p.iouThrs):
                for dind, d in enumerate(dt):
                    # information about best match so far (m=-1 -> unmatched)
                    iou = min([t, 1 - 1e-10])
                    m = -1
                    for gind, g in enumerate(gt):
                        # if this gt already matched, and not a crowd, continue
                        if gtm[tind, gind] > 0 and not iscrowd[gind]:
                            continue
                        # if dt matched to reg gt, and on ignore gt, stop
                        if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
                            break
                        # continue to next gt unless better match made
                        if ious[dind, gind] < iou:
                            continue
                        # if match successful and best so far, store appropriately
                        iou = ious[dind, gind]
                        m = gind
                    # if match made store id of match for both dt and gt
                    if m == -1:
                        continue
                    dtIg[tind, dind] = gtIg[m]
                    dtm[tind, dind] = gt[m]['id']
                    gtm[tind, m] = d['id']
        # set unmatched detections outside of area range to ignore
        a = np.array([d['area'] < aRng[0] or d['area'] > aRng[1]
                      for d in dt]).reshape((1, len(dt)))
        dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T,
                                                                      0)))
        # store results for given image and category
        return {
            'image_id': imgId,
            'category_id': catId,
            'aRng': aRng,
            'maxDet': maxDet,
            'dtIds': [d['id'] for d in dt],
            'gtIds': [g['id'] for g in gt],
            'dtMatches': dtm,
            'gtMatches': gtm,
            'dtScores': [d['score'] for d in dt],
            'gtIgnore': gtIg,
            'dtIgnore': dtIg,
        }

[docs]    def accumulate(self, p=None):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        Args:
          param p: input params for evaluation
        Return:
          None
        '''
        logging.info('Accumulating evaluation results...')
        tic = time.time()
        if not self.evalImgs:
            logging.info('Please run evaluate() first')
        # allows input customized parameters
        if p is None:
            p = self.params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T = len(p.iouThrs)
        R = len(p.recThrs)
        K = len(p.catIds) if p.useCats else 1
        A = len(p.areaRng)
        M = len(p.maxDets)
        precision = -np.ones(
            (T, R, K, A, M))  # -1 for the precision of absent categories
        recall = -np.ones((T, K, A, M))
        scores = -np.ones((T, R, K, A, M))

        # create dictionary for future indexing
        _pe = self._paramsEval
        catIds = _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setA = set(map(tuple, _pe.areaRng))
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds) if k in setK]
        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
        a_list = [
            n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng))
            if a in setA
        ]
        i_list = [n for n, i in enumerate(p.imgIds) if i in setI]

        I0 = len(_pe.imgIds)
        A0 = len(_pe.areaRng)
        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):
            Nk = k0 * A0 * I0
            for a, a0 in enumerate(a_list):
                Na = a0 * I0
                for m, maxDet in enumerate(m_list):
                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
                    E = [e for e in E if e is not None]
                    if len(E) == 0:
                        continue
                    dtScores = np.concatenate(
                        [e['dtScores'][0:maxDet] for e in E])

                    # different sorting method generates slightly different results.
                    # mergesort is used to be consistent as Matlab implementation.
                    inds = np.argsort(-dtScores, kind='mergesort')
                    dtScoresSorted = dtScores[inds]

                    dtm = np.concatenate(
                        [e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:,
                                                                          inds]
                    dtIg = np.concatenate(
                        [e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:,
                                                                         inds]
                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
                    npig = np.count_nonzero(gtIg == 0)
                    if npig == 0:
                        continue
                    tps = np.logical_and(dtm, np.logical_not(dtIg))
                    fps = np.logical_and(
                        np.logical_not(dtm), np.logical_not(dtIg))

                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                        tp = np.array(tp)
                        fp = np.array(fp)
                        nd = len(tp)
                        rc = tp / npig
                        pr = tp / (fp + tp + np.spacing(1))
                        q = np.zeros((R, ))
                        ss = np.zeros((R, ))

                        if nd:
                            recall[t, k, a, m] = rc[-1]
                        else:
                            recall[t, k, a, m] = 0

                        # numpy is slow without cython optimization for accessing elements
                        # use python array gets significant speed improvement
                        pr = pr.tolist()
                        q = q.tolist()

                        for i in range(nd - 1, 0, -1):
                            if pr[i] > pr[i - 1]:
                                pr[i - 1] = pr[i]

                        inds = np.searchsorted(rc, p.recThrs, side='left')
                        try:
                            for ri, pi in enumerate(inds):
                                q[ri] = pr[pi]
                                ss[ri] = dtScoresSorted[pi]
                        except:
                            pass
                        precision[t, :, k, a, m] = np.array(q)
                        scores[t, :, k, a, m] = np.array(ss)
        self.eval = {
            'params': p,
            'counts': [T, R, K, A, M],
            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'precision': precision,
            'recall': recall,
            'scores': scores,
        }
        toc = time.time()
        logging.info('Accumulating DONE (t={:0.2f}s).'.format(toc - tic))

[docs]    def summarize(self):
        '''
        Compute and display summary metrics for evaluation results.
        Note this functin can *only* be applied on the default parameter setting
        '''

        def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
            p = self.params
            iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
            titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
            typeStr = '(AP)' if ap == 1 else '(AR)'
            iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
                if iouThr is None else '{:0.2f}'.format(iouThr)

            aind = [
                i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng
            ]
            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
            if ap == 1:
                # dimension of precision: [TxRxKxAxM]
                s = self.eval['precision']
                # IoU
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                s = s[:, :, :, aind, mind]
            else:
                # dimension of recall: [TxKxAxM]
                s = self.eval['recall']
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                s = s[:, :, aind, mind]
            if len(s[s > -1]) == 0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s > -1])
            logging.info(
                iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets,
                            mean_s))
            return mean_s

        def _summarizeDets():
            stats = np.zeros((12, ))
            stats[0] = _summarize(1)
            stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
            stats[2] = _summarize(
                1, iouThr=.75, maxDets=self.params.maxDets[2])
            stats[3] = _summarize(
                1, areaRng='small', maxDets=self.params.maxDets[2])
            stats[4] = _summarize(
                1, areaRng='medium', maxDets=self.params.maxDets[2])
            stats[5] = _summarize(
                1, areaRng='large', maxDets=self.params.maxDets[2])
            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
            stats[9] = _summarize(
                0, areaRng='small', maxDets=self.params.maxDets[2])
            stats[10] = _summarize(
                0, areaRng='medium', maxDets=self.params.maxDets[2])
            stats[11] = _summarize(
                0, areaRng='large', maxDets=self.params.maxDets[2])
            return stats

        def _summarizeKps():
            stats = np.zeros((10, ))
            stats[0] = _summarize(1, maxDets=20)
            stats[1] = _summarize(1, maxDets=20, iouThr=.5)
            stats[2] = _summarize(1, maxDets=20, iouThr=.75)
            stats[3] = _summarize(1, maxDets=20, areaRng='medium')
            stats[4] = _summarize(1, maxDets=20, areaRng='large')
            stats[5] = _summarize(0, maxDets=20)
            stats[6] = _summarize(0, maxDets=20, iouThr=.5)
            stats[7] = _summarize(0, maxDets=20, iouThr=.75)
            stats[8] = _summarize(0, maxDets=20, areaRng='medium')
            stats[9] = _summarize(0, maxDets=20, areaRng='large')
            return stats

        if not self.eval:
            raise Exception('Please run accumulate() first')
        iouType = self.params.iouType
        if iouType == 'segm' or iouType == 'bbox':
            summarize = _summarizeDets
        elif iouType == 'keypoints':
            summarize = _summarizeKps
        self.stats = summarize()

    def __str__(self):
        self.summarize()

    # add for metric per category end here
[docs]    def summarize_per_category(self):
        '''
        Compute and display summary metrics for evaluation results *per category*.
        Note this functin can *only* be applied on the default parameter setting
        '''

        def _summarize_single_category(ap=1,
                                       iouThr=None,
                                       categoryId=None,
                                       areaRng='all',
                                       maxDets=100):
            p = self.params
            iStr = ' {:<18} {} @[ CategoryId={:>3d} | IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
            titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
            typeStr = '(AP)' if ap == 1 else '(AR)'
            iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
                if iouThr is None else '{:0.2f}'.format(iouThr)

            aind = [
                i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng
            ]
            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

            if ap == 1:
                # dimension of precision: [TxRxKxAxM]
                s = self.eval['precision']
                # IoU
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                if categoryId is not None:
                    category_index = [
                        i for i, i_catId in enumerate(p.catIds)
                        if i_catId == categoryId
                    ]
                    s = s[:, :, category_index, aind, mind]
                else:
                    s = s[:, :, :, aind, mind]
            else:
                # dimension of recall: [TxKxAxM]
                s = self.eval['recall']
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                if categoryId is not None:
                    category_index = [
                        i for i, i_catId in enumerate(p.catIds)
                        if i_catId == categoryId
                    ]
                    s = s[:, category_index, aind, mind]
                else:
                    s = s[:, :, aind, mind]
            if len(s[s > -1]) == 0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s > -1])
            # print(iStr.format(titleStr, typeStr, catId, iouStr, areaRng, maxDets, mean_s))
            return mean_s

        def _summarizeDets_per_category():
            category_stats = np.zeros((12, len(self.params.catIds)))
            for category_index, category_id in enumerate(self.params.catIds):
                category_stats[0][category_index] = _summarize_single_category(
                    1, categoryId=category_id)
                category_stats[1][category_index] = _summarize_single_category(
                    1,
                    iouThr=.5,
                    maxDets=self.params.maxDets[2],
                    categoryId=category_id)
                category_stats[2][category_index] = _summarize_single_category(
                    1,
                    iouThr=.75,
                    maxDets=self.params.maxDets[2],
                    categoryId=category_id)
                category_stats[3][category_index] = _summarize_single_category(
                    1,
                    areaRng='small',
                    maxDets=self.params.maxDets[2],
                    categoryId=category_id)
                category_stats[4][category_index] = _summarize_single_category(
                    1,
                    areaRng='medium',
                    maxDets=self.params.maxDets[2],
                    categoryId=category_id)
                category_stats[5][category_index] = _summarize_single_category(
                    1,
                    areaRng='large',
                    maxDets=self.params.maxDets[2],
                    categoryId=category_id)
                category_stats[6][category_index] = _summarize_single_category(
                    0, maxDets=self.params.maxDets[0], categoryId=category_id)
                category_stats[7][category_index] = _summarize_single_category(
                    0, maxDets=self.params.maxDets[1], categoryId=category_id)
                category_stats[8][category_index] = _summarize_single_category(
                    0, maxDets=self.params.maxDets[2], categoryId=category_id)
                category_stats[9][category_index] = _summarize_single_category(
                    0,
                    areaRng='small',
                    maxDets=self.params.maxDets[2],
                    categoryId=category_id)
                category_stats[10][
                    category_index] = _summarize_single_category(
                        0,
                        areaRng='medium',
                        maxDets=self.params.maxDets[2],
                        categoryId=category_id)
                category_stats[11][
                    category_index] = _summarize_single_category(
                        0,
                        areaRng='large',
                        maxDets=self.params.maxDets[2],
                        categoryId=category_id)
            return category_stats

        def _summarizeKps_per_category():
            category_stats = np.zeros((10, len(self.params.catIds)))
            for category_index, category_id in self.params.catIds:
                category_stats[0][category_index] = _summarize_single_category(
                    1, maxDets=20, categoryId=category_id)
                category_stats[1][category_index] = _summarize_single_category(
                    1, maxDets=20, iouThr=.5, categoryId=category_id)
                category_stats[2][category_index] = _summarize_single_category(
                    1, maxDets=20, iouThr=.75, categoryId=category_id)
                category_stats[3][category_index] = _summarize_single_category(
                    1, maxDets=20, areaRng='medium', categoryId=category_id)
                category_stats[4][category_index] = _summarize_single_category(
                    1, maxDets=20, areaRng='large', categoryId=category_id)
                category_stats[5][category_index] = _summarize_single_category(
                    0, maxDets=20, categoryId=category_id)
                category_stats[6][category_index] = _summarize_single_category(
                    0, maxDets=20, iouThr=.5, categoryId=category_id)
                category_stats[7][category_index] = _summarize_single_category(
                    0, maxDets=20, iouThr=.75, categoryId=category_id)
                category_stats[8][category_index] = _summarize_single_category(
                    0, maxDets=20, areaRng='medium', categoryId=category_id)
                category_stats[9][category_index] = _summarize_single_category(
                    0, maxDets=20, areaRng='large', categoryId=category_id)
            return category_stats

        if not self.eval:
            raise Exception('Please run accumulate() first')
        iouType = self.params.iouType
        if iouType == 'segm' or iouType == 'bbox':
            summarize_per_category = _summarizeDets_per_category
        elif iouType == 'keypoints':
            summarize_per_category = _summarizeKps_per_category
        self.category_stats = summarize_per_category()

[docs]    def filter_annotations(self, annotations, catIds):
        if hasattr(catIds, '__iter__') or hasattr(catIds, '__len__'):
            catIds = catIds
        else:
            catIds = [catIds]

        new_annotations = []
        for anno in annotations:
            if anno['category_id'] in catIds:
                new_annotations.append(anno)

        return new_annotations

[docs]    def makeplot(self, recThrs, precisions, name, save_dir=None):
        if save_dir is not None and not os.path.exists(save_dir):
            os.makedirs(save_dir)
        # precisions  7x101x4x1]
        assert precisions.shape[2] == 4
        areaNms = ['all', 'small', 'medium', 'large']
        figures_np = {}
        for aidx in range(precisions.shape[2]):
            plotname = '%s-%s' % (name, areaNms[aidx])
            # ps shape 7x101
            ps = precisions[:, :, aidx, 0].squeeze()
            ap = np.round(np.mean(ps, axis=1) * 1000)

            ds = np.concatenate((np.expand_dims(ps[0],
                                                axis=0), np.diff(ps, axis=0)))
            colors = [[1.0, 1.0, 1.0], [0.5, 0.5, 0.5], [.31, .51, .74],
                      [.75, .31, .30], [.36, .90, .38], [.50, .39, .64],
                      [1.0, .6, 0.0]]
            # format legend str
            ls = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']
            for i in range(precisions.shape[0]):
                if ap[i] == 1000:
                    ls[i] = '[1.00] ' + ls[i]
                else:
                    ls[i] = '[.%03d] %s' % (ap[i], ls[i])

            fig = plt.figure()
            plt.stackplot(recThrs, ds, labels=ls, colors=colors)
            plt.axis([0.0, 1.0, 0.0, 1.0])
            plt.legend(loc='lower left')
            plt.xlabel('recall')
            plt.ylabel('precision')
            plt.title(plotname)

            # if save_dir is set, figure will be saved to save_dir
            if save_dir is not None:
                save_file = '%s/%s.pdf' % (save_dir, plotname)
                logging.info('save figure to %s' % save_file)
                fig.savefig(save_file)
            fig.canvas.draw()
            data = np.fromstring(
                fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
            data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))
            figures_np[plotname] = data
            plt.close()

        return figures_np

[docs]    def analyze(self):
        '''
        Analyze errors
        '''
        prm = copy.deepcopy(self.params)
        self.params.maxDets = [100]
        catIds = sorted(self.cocoGt.getCatIds())
        self.params.catIds = catIds
        self.params.iouThrs = np.array([0.75, 0.5, 0.1])
        self.evaluate()
        self.accumulate()
        ps = self.eval['precision']

        class Type(object):
            C75 = 0
            C50 = 1
            Loc = 2
            Sim = 3
            Other = 4
            BG = 5
            FN = 6

        # 0 for C75
        # 1 for C50
        # 2 for Loc
        # 3 for Sim
        # 4 for Other
        # 5 for BG
        # 5 for FN
        ps = np.concatenate((ps, np.zeros([4] + list(ps.shape[1:]))))

        self.params.iouThrs = np.array([0.1])
        self.params.useCats = 0

        dt = self.cocoDt
        gt = self.cocoGt
        self.analyze_figures = {}
        for k, catId in enumerate(catIds):
            nm = self.cocoGt.loadCats(catId)[0]
            if 'supercategory' in nm:
                nm = nm['supercategory'] + '-' + nm['name']
            else:
                nm = nm['name']
            logging.info('Analyzing %s (%d):' % (nm, k))
            start_time = time.time()

            # select detections for single category only
            self.params.keepDtCatIds = [catId]

            # compute precision but ignore superclass confusion
            cur_cat = gt.loadCats(catId)[0]
            if 'supercategory' in cur_cat:
                similar_cat_ids = gt.getCatIds(supNms=cur_cat['supercategory'])
                self.params.keepGtCatIds = similar_cat_ids
                self.params.targetCatId = catId

                # computeIoU need real catIds, we need to recover it
                self.params.catIds = catIds
                self.evaluate()
                self.accumulate()
                ps[Type.Sim, :, k, :, :] = self.eval['precision'][0, :,
                                                                  0, :, :]
            else:
                # skip  superclass confusion evaluation
                ps[Type.Sim, :, k, :, :] = ps[2, :, k, :, :]

            # compute precision but ignore any class confusion
            self.params.targetCatId = catId
            self.params.keepGtCatIds = catIds
            self.params.catIds = catIds
            self.evaluate()
            self.accumulate()
            ps[Type.Other, :, k, :, :] = self.eval['precision'][0, :, 0, :, :]

            # fill in background and false negative errors and plot
            ps[ps == -1] = 0
            ps[Type.BG, :, k, :] = (ps[4, :, k, :] > 0).astype(np.float32)
            ps[Type.FN, :, k, :] = 1
            end_time = time.time()

            # test
            # dst_dir = 'py_precision'
            # if not os.path.exists(dst_dir):
            #     os.makedirs(dst_dir)
            # result_file = '%s/class_%d' % (dst_dir, k)
            # np.save(result_file, ps)

            figures = self.makeplot(
                self.params.recThrs,
                ps[:, :, k, :, :],
                nm,
                save_dir=self.params.outDir)
            self.analyze_figures.update(figures)
            logging.info('Analyzing DONE (t=%0.2fs).' %
                         (end_time - start_time))

        # reset Dt and Gt, params
        self.params = prm
        figures = self.makeplot(
            self.params.recThrs,
            np.mean(ps, axis=2),
            'overall-all',
            save_dir=self.params.outDir)
        self.analyze_figures.update(figures)
        if 'supercategory' in self.cocoGt.cats.values()[0]:
            sup = [
                cat['supercategory'] for cat in self.cocoGt.loadCats(catIds)
            ]
            logging.info('all sup cats: %s' % (set(sup)))
            for k in set(sup):
                ps1 = np.mean(ps[:, :, np.array(sup) == k, :, :], axis=2)
                figures = self.makeplot(
                    self.params.recThrs,
                    ps1,
                    'overall-%s' % k,
                    save_dir=self.params.outDir)
                self.analyze_figures.update(figures)


[docs]class Params:
    '''
    Params for coco evaluation api
    '''

[docs]    def setDetParams(self):
        self.imgIds = []
        self.catIds = []
        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
        self.iouThrs = np.linspace(
            .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
        self.recThrs = np.linspace(
            .0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
        self.maxDets = [1, 10, 100]
        self.areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2],
                        [96**2, 1e5**2]]
        self.areaRngLbl = ['all', 'small', 'medium', 'large']
        self.useCats = 1

        # if set will filter categories in _prepare func
        self.keepDtCatIds = []
        self.keepGtCatIds = []
        # if set greater than 0, annotation whose catId != targetCatId  will be set to ignore
        self.targetCatId = -1

        self.outDir = None

[docs]    def setKpParams(self):
        self.imgIds = []
        self.catIds = []
        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
        self.iouThrs = np.linspace(
            .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
        self.recThrs = np.linspace(
            .0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
        self.maxDets = [20]
        self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]]
        self.areaRngLbl = ['all', 'medium', 'large']
        self.useCats = 1

        # if set will filter categories in _prepare func
        self.keepDtCatIds = []
        self.keepGtCatIds = []
        # if set greater than 0, annotation whose catId != targetCatId  will be set to ignore
        self.targetCatId = -1

        self.outDir = None

[docs]    def __init__(self, iouType='segm'):
        if iouType == 'segm' or iouType == 'bbox':
            self.setDetParams()
        elif iouType == 'keypoints':
            self.setKpParams()
        else:
            raise Exception('iouType not supported')
        self.iouType = iouType
        # useSegm is deprecated
        self.useSegm = None