Source code for easycv.datasets.pose.data_sources.oc_human

# Copyright (c) OpenMMLab. All rights reserved.
# Adapt from https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
import json
import logging
import os

import numpy as np

from easycv.datasets.registry import DATASOURCES
from easycv.framework.errors import ValueError
from .top_down import PoseTopDownSource

OC_HUMAN_DATASET_INFO = dict(
    dataset_name='OC HUMAN',
    paper_info=dict(
        author=
        'Song-Hai Zhang, Ruilong Li, Xin Dong, Paul L. Rosin, Zixi Cai, Han Xi, Dingcheng Yang, Hao-Zhi Huang, Shi-Min Hu',
        title='Pose2Seg: Detection Free Human Instance Segmentation',
        container='Computer Vision and Pattern Recognition',
        year='2019',
        homepage='https://github.com/liruilong940607/OCHumanApi'),
    keypoint_info={
        0:
        dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
        1:
        dict(
            name='left_eye',
            id=1,
            color=[51, 153, 255],
            type='upper',
            swap='right_eye'),
        2:
        dict(
            name='right_eye',
            id=2,
            color=[51, 153, 255],
            type='upper',
            swap='left_eye'),
        3:
        dict(
            name='left_ear',
            id=3,
            color=[51, 153, 255],
            type='upper',
            swap='right_ear'),
        4:
        dict(
            name='right_ear',
            id=4,
            color=[51, 153, 255],
            type='upper',
            swap='left_ear'),
        5:
        dict(
            name='left_shoulder',
            id=5,
            color=[0, 255, 0],
            type='upper',
            swap='right_shoulder'),
        6:
        dict(
            name='right_shoulder',
            id=6,
            color=[255, 128, 0],
            type='upper',
            swap='left_shoulder'),
        7:
        dict(
            name='left_elbow',
            id=7,
            color=[0, 255, 0],
            type='upper',
            swap='right_elbow'),
        8:
        dict(
            name='right_elbow',
            id=8,
            color=[255, 128, 0],
            type='upper',
            swap='left_elbow'),
        9:
        dict(
            name='left_wrist',
            id=9,
            color=[0, 255, 0],
            type='upper',
            swap='right_wrist'),
        10:
        dict(
            name='right_wrist',
            id=10,
            color=[255, 128, 0],
            type='upper',
            swap='left_wrist'),
        11:
        dict(
            name='left_hip',
            id=11,
            color=[0, 255, 0],
            type='lower',
            swap='right_hip'),
        12:
        dict(
            name='right_hip',
            id=12,
            color=[255, 128, 0],
            type='lower',
            swap='left_hip'),
        13:
        dict(
            name='left_knee',
            id=13,
            color=[0, 255, 0],
            type='lower',
            swap='right_knee'),
        14:
        dict(
            name='right_knee',
            id=14,
            color=[255, 128, 0],
            type='lower',
            swap='left_knee'),
        15:
        dict(
            name='left_ankle',
            id=15,
            color=[0, 255, 0],
            type='lower',
            swap='right_ankle'),
        16:
        dict(
            name='right_ankle',
            id=16,
            color=[255, 128, 0],
            type='lower',
            swap='left_ankle')
    },
    skeleton_info={
        0:
        dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
        1:
        dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
        2:
        dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
        3:
        dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
        4:
        dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
        5:
        dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
        6:
        dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
        7:
        dict(
            link=('left_shoulder', 'right_shoulder'),
            id=7,
            color=[51, 153, 255]),
        8:
        dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
        9:
        dict(
            link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
        10:
        dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
        11:
        dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
        12:
        dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
        13:
        dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
        14:
        dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
        15:
        dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
        16:
        dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
        17:
        dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
        18:
        dict(
            link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
    },
    joint_weights=[
        1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
        1.5
    ],
    sigmas=[
        0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
        0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
    ])


[docs]@DATASOURCES.register_module
class PoseTopDownSourceChHuman(PoseTopDownSource):
    """Oc Human Source for top-down pose estimation.

    `Pose2Seg: Detection Free Human Instance Segmentation' ECCV'2019
    More details can be found in the `paper
    <https://arxiv.org/abs/1803.10683>`__ .

    The source loads raw features to build a data meta object
    containing the image info, annotation info and others.

    Oc Human keypoint indexes::

        0: 'nose',
        1: 'left_eye',
        2: 'right_eye',
        3: 'left_ear',
        4: 'right_ear',
        5: 'left_shoulder',
        6: 'right_shoulder',
        7: 'left_elbow',
        8: 'right_elbow',
        9: 'left_wrist',
        10: 'right_wrist',
        11: 'left_hip',
        12: 'right_hip',
        13: 'left_knee',
        14: 'right_knee',
        15: 'left_ankle',
        16: 'right_ankle'

    Args:
        ann_file (str): Path to the annotation file.
        img_prefix (str): Path to a directory where images are held.
            Default: None.
        data_cfg (dict): config
        subset: Applicable to non-coco or coco style data sets,
                if subset == train or val or test, in non-coco style
                else subset == None , in coco style
        dataset_info (DatasetInfo): A class containing all dataset info.
        test_mode (bool): Store True when building test or

    """

[docs]    def __init__(self,
                 ann_file,
                 img_prefix,
                 data_cfg,
                 subset=None,
                 dataset_info=None,
                 test_mode=False,
                 **kwargs):

        if dataset_info is None:
            logging.info(
                'dataset_info is missing, use default coco dataset info')
            dataset_info = OC_HUMAN_DATASET_INFO

        self.subset = subset

        super().__init__(
            ann_file,
            img_prefix,
            data_cfg,
            coco_style=not bool(subset),  # bool(1-bool(subset))
            dataset_info=dataset_info,
            test_mode=test_mode)

    def _get_db(self):
        """Load dataset."""
        # ground truth bbox
        if self.subset:
            gt_db = self._load_keypoint_annotations()
        else:
            gt_db = super()._load_keypoint_annotations()

        return gt_db

    def _load_keypoint_annotations(self):
        self._load_annofile()
        gt_db = list()
        for img_id in self.imgIds:
            gt_db.extend(self._oc_load_keypoint_annotation_kernel(img_id))
        return gt_db

    def _load_annofile(self):
        self.human = json.load(open(self.ann_file, 'r'))

        self.keypoint_names = self.human['keypoint_names']
        self.keypoint_visible = self.human['keypoint_visible']

        self.images = {}
        self.imgIds = []
        for imgItem in self.human['images']:
            annos = [
                anno for anno in imgItem['annotations'] if anno['keypoints']
            ]
            imgItem['annotations'] = annos
            self.imgIds.append(imgItem['image_id'])
            self.images[imgItem['image_id']] = imgItem

        assert len(self.imgIds) > 0, f'{self.ann_file} is None file'
        if self.subset == 'train':
            self.imgIds = self.imgIds[:int(len(self.imgIds) * 0.75)]
        else:
            self.imgIds = self.imgIds[int(len(self.imgIds) * 0.75):]

        self.num_images = len(self.imgIds)

    def _oc_load_keypoint_annotation_kernel(self, img_id,
                                            maxIouRange=(0., 1.)):
        """load annotation from OCHumanAPI.

        Note:
            bbox:[x1, y1, w, h]
        Args:
            img_id: coco image id
        Returns:
            dict: db entry
        """

        data = self.images[img_id]
        file_name = data['file_name']
        width = data['width']
        height = data['height']
        num_joints = self.ann_info['num_joints']

        bbox_id = 0
        rec = []
        for i, anno in enumerate(data['annotations']):
            kpt = anno['keypoints']
            max_iou = anno['max_iou']
            if max_iou < maxIouRange[0] or max_iou >= maxIouRange[1]:
                continue
            # coco box: xyxy -> xywh
            x1, y1, x2, y2 = anno['bbox']
            x, y, w, h = [x1, y1, x2 - x1, y2 - y1]
            area = (x2 - x1) * (y2 - y1)
            x1 = max(0, x)
            y1 = max(0, y)
            x2 = min(width - 1, x1 + max(0, w - 1))
            y2 = min(height - 1, y1 + max(0, h - 1))
            if area > 0 and x2 > x1 and y2 > y1:
                bbox = [x1, y1, x2 - x1, y2 - y1]

            # coco kpt: vis 2, not vis 1, missing 0.
            # 'keypoint_visible': {'missing': 0, 'vis': 1, 'self_occluded': 2, 'others_occluded': 3},
            kptDef = self.human['keypoint_names']

            kptDefCoco = [
                'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
                'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
                'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
                'left_knee', 'right_knee', 'left_ankle', 'right_ankle'
            ]
            kptCoco = []
            num_keypoints = 0
            for i in range(len(kptDefCoco)):
                idx = kptDef.index(kptDefCoco[i])
                x, y, v = kpt[idx * 3:idx * 3 + 3]
                if v == 1 or v == 2:
                    v = 2
                    num_keypoints += 1
                elif v == 3:
                    v = 1
                    num_keypoints += 1
                kptCoco += [x, y, v]
            assert len(kptCoco) == 17 * 3

            joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
            joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)

            keypoints = np.array(kptCoco).reshape(-1, 3)
            joints_3d[:, :2] = keypoints[:, :2]
            joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
            center, scale = super()._xywh2cs(*bbox)
            # image path
            image_file = os.path.join(self.img_prefix, file_name)
            rec.append({
                'image_file': image_file,
                'image_id': img_id,
                'center': center,
                'scale': scale,
                'bbox': bbox,
                'rotation': 0,
                'joints_3d': joints_3d,
                'joints_3d_visible': joints_3d_visible,
                'dataset': self.dataset_name,
                'bbox_score': 1,
                'bbox_id': bbox_id
            })
            bbox_id = bbox_id + 1
        return rec