Spaces:
Runtime error
Runtime error
| # coding=utf-8 | |
| # Copyright 2021 The Deeplab2 Authors. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Provides data from segmentation datasets. | |
| Currently, we support the following datasets: | |
| 1. Cityscapes dataset (https://www.cityscapes-dataset.com). | |
| The Cityscapes dataset contains 19 semantic labels (such as road, person, car, | |
| and so on) for urban street scenes. | |
| 2. KITTI-STEP (http://www.cvlibs.net/datasets/kitti/). | |
| The KITTI-STEP enriches the KITTI-MOTS data with additional `stuff' | |
| anntotations. | |
| 3. MOTChallenge-STEP (https://motchallenge.net/). | |
| The MOTChallenge-STEP enriches the MOTSChallenge data with additional `stuff' | |
| annotations. | |
| 4. MSCOCO panoptic segmentation (http://cocodataset.org/#panoptic-2018). | |
| Panoptic segmentation annotations for MSCOCO dataset. Note that we convert the | |
| provided MSCOCO panoptic segmentation format to the following one: | |
| panoptic label = semantic label * 256 + instance id. | |
| 5. Cityscapes-DVPS (https://github.com/joe-siyuan-qiao/ViP-DeepLab) | |
| The Cityscapes-DVPS dataset augments Cityscapes-VPS | |
| (https://github.com/mcahny/vps) with depth annotations. | |
| References: | |
| - Marius Cordts, Mohamed Omran, Sebastian Ramos, Timo Rehfeld, Markus | |
| Enzweiler, Rodrigo Benenson, Uwe Franke, Stefan Roth, and Bernt Schiele, "The | |
| Cityscapes Dataset for Semantic Urban Scene Understanding." In CVPR, 2016. | |
| - Andreas Geiger and Philip Lenz and Raquel Urtasun, "Are we ready for | |
| Autonomous Driving? The KITTI Vision Benchmark Suite." In CVPR, 2012. | |
| - Alexander Kirillov, Kaiming He, Ross Girshick, Carsten Rother, and Piotr | |
| Dollar, "Panoptic Segmentation." In CVPR, 2019. | |
| - Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. | |
| Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollar, and C. | |
| Lawrence Zitnick, "Microsoft COCO: common objects in context." In ECCV, 2014. | |
| - Anton Milan, Laura Leal-Taixe, Ian Reid, Stefan Roth, and Konrad Schindler, | |
| "Mot16: A benchmark for multi-object tracking." arXiv:1603.00831, 2016. | |
| - Paul Voigtlaender, Michael Krause, Aljosa Osep, Jonathon Luiten, Berin | |
| Balachandar Gnana Sekar, Andreas Geiger, and Bastian Leibe. "MOTS: | |
| Multi-object tracking and segmentation." In CVPR, 2019 | |
| - Mark Weber, Jun Xie, Maxwell Collins, Yukun Zhu, Paul Voigtlaender, Hartwig | |
| Adam, Bradley Green, Andreas Geiger, Bastian Leibe, Daniel Cremers, Aljosa | |
| Osep, Laura Leal-Taixe, and Liang-Chieh Chen, "STEP: Segmenting and Tracking | |
| Every Pixel." arXiv: 2102.11859, 2021. | |
| - Dahun Kim, Sanghyun Woo, Joon-Young Lee, and In So Kweon. "Video panoptic | |
| segmentation." In CVPR, 2020. | |
| - Siyuan Qiao, Yukun Zhu, Hartwig Adam, Alan Yuille, and Liang-Chieh Chen. | |
| "ViP-DeepLab: Learning Visual Perception with Depth-aware Video Panoptic | |
| Segmentation." In CVPR, 2021. | |
| """ | |
| import collections | |
| # Dataset names. | |
| _CITYSCAPES = 'cityscapes' | |
| _CITYSCAPES_PANOPTIC = 'cityscapes_panoptic' | |
| _KITTI_STEP = 'kitti_step' | |
| _MOTCHALLENGE_STEP = 'motchallenge_step' | |
| _CITYSCAPES_DVPS = 'cityscapes_dvps' | |
| _COCO_PANOPTIC = 'coco_panoptic' | |
| # Colormap names. | |
| _CITYSCAPES_COLORMAP = 'cityscapes' | |
| _MOTCHALLENGE_COLORMAP = 'motchallenge' | |
| _COCO_COLORMAP = 'coco' | |
| # Named tuple to describe dataset properties. | |
| DatasetDescriptor = collections.namedtuple( | |
| 'DatasetDescriptor', [ | |
| 'dataset_name', # Dataset name. | |
| 'splits_to_sizes', # Splits of the dataset into training, val and test. | |
| 'num_classes', # Number of semantic classes. | |
| 'ignore_label', # Ignore label value used for semantic segmentation. | |
| # Fields below are used for panoptic segmentation and will be None for | |
| # Semantic segmentation datasets. | |
| # Label divisor only used in panoptic segmentation annotation to infer | |
| # semantic label and instance id. | |
| 'panoptic_label_divisor', | |
| # A tuple of classes that contains instance annotations. For example, | |
| # 'person' class has instance annotations while 'sky' does not. | |
| 'class_has_instances_list', | |
| # A flag indicating whether the dataset is a video dataset that contains | |
| # sequence IDs and frame IDs. | |
| 'is_video_dataset', | |
| # A string specifying the colormap that should be used for | |
| # visualization. E.g. 'cityscapes'. | |
| 'colormap', | |
| # A flag indicating whether the dataset contains depth annotation. | |
| 'is_depth_dataset', | |
| ] | |
| ) | |
| CITYSCAPES_INFORMATION = DatasetDescriptor( | |
| dataset_name=_CITYSCAPES, | |
| splits_to_sizes={'train_fine': 2975, | |
| 'train_coarse': 22973, | |
| 'trainval_fine': 3475, | |
| 'trainval_coarse': 23473, | |
| 'val_fine': 500, | |
| 'test_fine': 1525}, | |
| num_classes=19, | |
| ignore_label=255, | |
| panoptic_label_divisor=None, | |
| class_has_instances_list=None, | |
| is_video_dataset=False, | |
| colormap=_CITYSCAPES_COLORMAP, | |
| is_depth_dataset=False, | |
| ) | |
| CITYSCAPES_PANOPTIC_INFORMATION = DatasetDescriptor( | |
| dataset_name=_CITYSCAPES_PANOPTIC, | |
| splits_to_sizes={'train_fine': 2975, | |
| 'val_fine': 500, | |
| 'trainval_fine': 3475, | |
| 'test_fine': 1525}, | |
| num_classes=19, | |
| ignore_label=255, | |
| panoptic_label_divisor=1000, | |
| class_has_instances_list=tuple(range(11, 19)), | |
| is_video_dataset=False, | |
| colormap=_CITYSCAPES_COLORMAP, | |
| is_depth_dataset=False, | |
| ) | |
| KITTI_STEP_INFORMATION = DatasetDescriptor( | |
| dataset_name=_KITTI_STEP, | |
| splits_to_sizes={'train': 5027, | |
| 'val': 2981, | |
| 'test': 11095}, | |
| num_classes=19, | |
| ignore_label=255, | |
| panoptic_label_divisor=1000, | |
| class_has_instances_list=(11, 13), | |
| is_video_dataset=True, | |
| colormap=_CITYSCAPES_COLORMAP, | |
| is_depth_dataset=False, | |
| ) | |
| MOTCHALLENGE_STEP_INFORMATION = DatasetDescriptor( | |
| dataset_name=_MOTCHALLENGE_STEP, | |
| splits_to_sizes={'train': 525, # Sequence 9. | |
| 'val': 600, # Sequence 2. | |
| 'test': 0}, | |
| num_classes=7, | |
| ignore_label=255, | |
| panoptic_label_divisor=1000, | |
| class_has_instances_list=(4,), | |
| is_video_dataset=True, | |
| colormap=_MOTCHALLENGE_COLORMAP, | |
| is_depth_dataset=False, | |
| ) | |
| CITYSCAPES_DVPS_INFORMATION = DatasetDescriptor( | |
| dataset_name=_CITYSCAPES_DVPS, | |
| # The numbers of images are 2400/300/300 for train/val/test. Here, the | |
| # sizes are the number of consecutive frame pairs. As each sequence has 6 | |
| # frames, the number of pairs for the train split is 2400 / 6 * 5 = 2000. | |
| # Similarly, we get 250 pairs for the val split and the test split. | |
| splits_to_sizes={'train': 2000, | |
| 'val': 250, | |
| 'test': 250}, | |
| num_classes=19, | |
| ignore_label=255, | |
| panoptic_label_divisor=1000, | |
| class_has_instances_list=tuple(range(11, 19)), | |
| is_video_dataset=True, | |
| colormap=_CITYSCAPES_COLORMAP, | |
| is_depth_dataset=True, | |
| ) | |
| COCO_PANOPTIC_INFORMATION = DatasetDescriptor( | |
| dataset_name=_COCO_PANOPTIC, | |
| splits_to_sizes={'train': 118287, | |
| 'val': 5000, | |
| 'test': 40670}, | |
| num_classes=134, | |
| ignore_label=0, | |
| panoptic_label_divisor=256, | |
| class_has_instances_list=tuple(range(1, 81)), | |
| is_video_dataset=False, | |
| colormap=_COCO_COLORMAP, | |
| is_depth_dataset=False, | |
| ) | |
| MAP_NAME_TO_DATASET_INFO = { | |
| _CITYSCAPES: CITYSCAPES_INFORMATION, | |
| _CITYSCAPES_PANOPTIC: CITYSCAPES_PANOPTIC_INFORMATION, | |
| _KITTI_STEP: KITTI_STEP_INFORMATION, | |
| _MOTCHALLENGE_STEP: MOTCHALLENGE_STEP_INFORMATION, | |
| _CITYSCAPES_DVPS: CITYSCAPES_DVPS_INFORMATION, | |
| _COCO_PANOPTIC: COCO_PANOPTIC_INFORMATION, | |
| } | |
| MAP_NAMES = list(MAP_NAME_TO_DATASET_INFO.keys()) | |