Spaces:

karolmajek
/

Axial-DeepLab-SWideRNet

Runtime error

App Files Files Community

Axial-DeepLab-SWideRNet / data /dataset.py

karolmajek

from https://huggingface.co/spaces/akhaliq/deeplab2

d1843be about 4 years ago

raw

history blame contribute delete

8.17 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Provides data from segmentation datasets.

	Currently, we support the following datasets:

	1. Cityscapes dataset (https://www.cityscapes-dataset.com).

	The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
	and so on) for urban street scenes.


	2. KITTI-STEP (http://www.cvlibs.net/datasets/kitti/).

	The KITTI-STEP enriches the KITTI-MOTS data with additional `stuff'
	anntotations.

	3. MOTChallenge-STEP (https://motchallenge.net/).

	The MOTChallenge-STEP enriches the MOTSChallenge data with additional `stuff'
	annotations.

	4. MSCOCO panoptic segmentation (http://cocodataset.org/#panoptic-2018).

	Panoptic segmentation annotations for MSCOCO dataset. Note that we convert the
	provided MSCOCO panoptic segmentation format to the following one:
	panoptic label = semantic label * 256 + instance id.

	5. Cityscapes-DVPS (https://github.com/joe-siyuan-qiao/ViP-DeepLab)
	The Cityscapes-DVPS dataset augments Cityscapes-VPS
	(https://github.com/mcahny/vps) with depth annotations.


	References:

	- Marius Cordts, Mohamed Omran, Sebastian Ramos, Timo Rehfeld, Markus
	Enzweiler, Rodrigo Benenson, Uwe Franke, Stefan Roth, and Bernt Schiele, "The
	Cityscapes Dataset for Semantic Urban Scene Understanding." In CVPR, 2016.

	- Andreas Geiger and Philip Lenz and Raquel Urtasun, "Are we ready for
	Autonomous Driving? The KITTI Vision Benchmark Suite." In CVPR, 2012.

	- Alexander Kirillov, Kaiming He, Ross Girshick, Carsten Rother, and Piotr
	Dollar, "Panoptic Segmentation." In CVPR, 2019.

	- Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B.
	Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollar, and C.
	Lawrence Zitnick, "Microsoft COCO: common objects in context." In ECCV, 2014.

	- Anton Milan, Laura Leal-Taixe, Ian Reid, Stefan Roth, and Konrad Schindler,
	"Mot16: A benchmark for multi-object tracking." arXiv:1603.00831, 2016.

	- Paul Voigtlaender, Michael Krause, Aljosa Osep, Jonathon Luiten, Berin
	Balachandar Gnana Sekar, Andreas Geiger, and Bastian Leibe. "MOTS:
	Multi-object tracking and segmentation." In CVPR, 2019

	- Mark Weber, Jun Xie, Maxwell Collins, Yukun Zhu, Paul Voigtlaender, Hartwig
	Adam, Bradley Green, Andreas Geiger, Bastian Leibe, Daniel Cremers, Aljosa
	Osep, Laura Leal-Taixe, and Liang-Chieh Chen, "STEP: Segmenting and Tracking
	Every Pixel." arXiv: 2102.11859, 2021.

	- Dahun Kim, Sanghyun Woo, Joon-Young Lee, and In So Kweon. "Video panoptic
	segmentation." In CVPR, 2020.

	- Siyuan Qiao, Yukun Zhu, Hartwig Adam, Alan Yuille, and Liang-Chieh Chen.
	"ViP-DeepLab: Learning Visual Perception with Depth-aware Video Panoptic
	Segmentation." In CVPR, 2021.
	"""

	import collections


	# Dataset names.
	_CITYSCAPES = 'cityscapes'
	_CITYSCAPES_PANOPTIC = 'cityscapes_panoptic'
	_KITTI_STEP = 'kitti_step'
	_MOTCHALLENGE_STEP = 'motchallenge_step'
	_CITYSCAPES_DVPS = 'cityscapes_dvps'
	_COCO_PANOPTIC = 'coco_panoptic'

	# Colormap names.
	_CITYSCAPES_COLORMAP = 'cityscapes'
	_MOTCHALLENGE_COLORMAP = 'motchallenge'
	_COCO_COLORMAP = 'coco'


	# Named tuple to describe dataset properties.
	DatasetDescriptor = collections.namedtuple(
	'DatasetDescriptor', [
	'dataset_name', # Dataset name.
	'splits_to_sizes', # Splits of the dataset into training, val and test.
	'num_classes', # Number of semantic classes.
	'ignore_label', # Ignore label value used for semantic segmentation.

	# Fields below are used for panoptic segmentation and will be None for
	# Semantic segmentation datasets.
	# Label divisor only used in panoptic segmentation annotation to infer
	# semantic label and instance id.
	'panoptic_label_divisor',
	# A tuple of classes that contains instance annotations. For example,
	# 'person' class has instance annotations while 'sky' does not.
	'class_has_instances_list',
	# A flag indicating whether the dataset is a video dataset that contains
	# sequence IDs and frame IDs.
	'is_video_dataset',
	# A string specifying the colormap that should be used for
	# visualization. E.g. 'cityscapes'.
	'colormap',
	# A flag indicating whether the dataset contains depth annotation.
	'is_depth_dataset',
	]
	)

	CITYSCAPES_INFORMATION = DatasetDescriptor(
	dataset_name=_CITYSCAPES,
	splits_to_sizes={'train_fine': 2975,
	'train_coarse': 22973,
	'trainval_fine': 3475,
	'trainval_coarse': 23473,
	'val_fine': 500,
	'test_fine': 1525},
	num_classes=19,
	ignore_label=255,
	panoptic_label_divisor=None,
	class_has_instances_list=None,
	is_video_dataset=False,
	colormap=_CITYSCAPES_COLORMAP,
	is_depth_dataset=False,
	)

	CITYSCAPES_PANOPTIC_INFORMATION = DatasetDescriptor(
	dataset_name=_CITYSCAPES_PANOPTIC,
	splits_to_sizes={'train_fine': 2975,
	'val_fine': 500,
	'trainval_fine': 3475,
	'test_fine': 1525},
	num_classes=19,
	ignore_label=255,
	panoptic_label_divisor=1000,
	class_has_instances_list=tuple(range(11, 19)),
	is_video_dataset=False,
	colormap=_CITYSCAPES_COLORMAP,
	is_depth_dataset=False,
	)

	KITTI_STEP_INFORMATION = DatasetDescriptor(
	dataset_name=_KITTI_STEP,
	splits_to_sizes={'train': 5027,
	'val': 2981,
	'test': 11095},
	num_classes=19,
	ignore_label=255,
	panoptic_label_divisor=1000,
	class_has_instances_list=(11, 13),
	is_video_dataset=True,
	colormap=_CITYSCAPES_COLORMAP,
	is_depth_dataset=False,
	)

	MOTCHALLENGE_STEP_INFORMATION = DatasetDescriptor(
	dataset_name=_MOTCHALLENGE_STEP,
	splits_to_sizes={'train': 525, # Sequence 9.
	'val': 600, # Sequence 2.
	'test': 0},
	num_classes=7,
	ignore_label=255,
	panoptic_label_divisor=1000,
	class_has_instances_list=(4,),
	is_video_dataset=True,
	colormap=_MOTCHALLENGE_COLORMAP,
	is_depth_dataset=False,
	)

	CITYSCAPES_DVPS_INFORMATION = DatasetDescriptor(
	dataset_name=_CITYSCAPES_DVPS,
	# The numbers of images are 2400/300/300 for train/val/test. Here, the
	# sizes are the number of consecutive frame pairs. As each sequence has 6
	# frames, the number of pairs for the train split is 2400 / 6 * 5 = 2000.
	# Similarly, we get 250 pairs for the val split and the test split.
	splits_to_sizes={'train': 2000,
	'val': 250,
	'test': 250},
	num_classes=19,
	ignore_label=255,
	panoptic_label_divisor=1000,
	class_has_instances_list=tuple(range(11, 19)),
	is_video_dataset=True,
	colormap=_CITYSCAPES_COLORMAP,
	is_depth_dataset=True,
	)

	COCO_PANOPTIC_INFORMATION = DatasetDescriptor(
	dataset_name=_COCO_PANOPTIC,
	splits_to_sizes={'train': 118287,
	'val': 5000,
	'test': 40670},
	num_classes=134,
	ignore_label=0,
	panoptic_label_divisor=256,
	class_has_instances_list=tuple(range(1, 81)),
	is_video_dataset=False,
	colormap=_COCO_COLORMAP,
	is_depth_dataset=False,
	)

	MAP_NAME_TO_DATASET_INFO = {
	_CITYSCAPES: CITYSCAPES_INFORMATION,
	_CITYSCAPES_PANOPTIC: CITYSCAPES_PANOPTIC_INFORMATION,
	_KITTI_STEP: KITTI_STEP_INFORMATION,
	_MOTCHALLENGE_STEP: MOTCHALLENGE_STEP_INFORMATION,
	_CITYSCAPES_DVPS: CITYSCAPES_DVPS_INFORMATION,
	_COCO_PANOPTIC: COCO_PANOPTIC_INFORMATION,
	}

	MAP_NAMES = list(MAP_NAME_TO_DATASET_INFO.keys())