Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| from detectron2.config import CfgNode as CN | |
| def add_cat_seg_config(cfg): | |
| """ | |
| Add config for MASK_FORMER. | |
| """ | |
| # data config | |
| # select the dataset mapper | |
| cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic" | |
| cfg.DATASETS.VAL_ALL = ("coco_2017_val_all_stuff_sem_seg",) | |
| # Color augmentation | |
| cfg.INPUT.COLOR_AUG_SSD = False | |
| # We retry random cropping until no single category in semantic segmentation GT occupies more | |
| # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. | |
| cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 | |
| # Pad image and segmentation GT in dataset mapper. | |
| cfg.INPUT.SIZE_DIVISIBILITY = -1 | |
| # solver config | |
| # weight decay on embedding | |
| cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 | |
| # optimizer | |
| cfg.SOLVER.OPTIMIZER = "ADAMW" | |
| cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 | |
| # mask_former model config | |
| cfg.MODEL.MASK_FORMER = CN() | |
| # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet) | |
| # you can use this config to override | |
| cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32 | |
| # swin transformer backbone | |
| cfg.MODEL.SWIN = CN() | |
| cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224 | |
| cfg.MODEL.SWIN.PATCH_SIZE = 4 | |
| cfg.MODEL.SWIN.EMBED_DIM = 96 | |
| cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2] | |
| cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24] | |
| cfg.MODEL.SWIN.WINDOW_SIZE = 7 | |
| cfg.MODEL.SWIN.MLP_RATIO = 4.0 | |
| cfg.MODEL.SWIN.QKV_BIAS = True | |
| cfg.MODEL.SWIN.QK_SCALE = None | |
| cfg.MODEL.SWIN.DROP_RATE = 0.0 | |
| cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0 | |
| cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3 | |
| cfg.MODEL.SWIN.APE = False | |
| cfg.MODEL.SWIN.PATCH_NORM = True | |
| cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"] | |
| # zero shot config | |
| cfg.MODEL.SEM_SEG_HEAD.TRAIN_CLASS_JSON = "datasets/ADE20K_2021_17_01/ADE20K_847.json" | |
| cfg.MODEL.SEM_SEG_HEAD.TEST_CLASS_JSON = "datasets/ADE20K_2021_17_01/ADE20K_847.json" | |
| cfg.MODEL.SEM_SEG_HEAD.TRAIN_CLASS_INDEXES = "datasets/coco/coco_stuff/split/seen_indexes.json" | |
| cfg.MODEL.SEM_SEG_HEAD.TEST_CLASS_INDEXES = "datasets/coco/coco_stuff/split/unseen_indexes.json" | |
| cfg.MODEL.SEM_SEG_HEAD.CLIP_PRETRAINED = "ViT-B/16" | |
| cfg.MODEL.PROMPT_ENSEMBLE = False | |
| cfg.MODEL.PROMPT_ENSEMBLE_TYPE = "single" | |
| cfg.MODEL.CLIP_PIXEL_MEAN = [122.7709383, 116.7460125, 104.09373615] | |
| cfg.MODEL.CLIP_PIXEL_STD = [68.5005327, 66.6321579, 70.3231630] | |
| # three styles for clip classification, crop, mask, cropmask | |
| cfg.MODEL.SEM_SEG_HEAD.TEXT_AFFINITY_DIM = 512 | |
| cfg.MODEL.SEM_SEG_HEAD.TEXT_AFFINITY_PROJ_DIM = 128 | |
| cfg.MODEL.SEM_SEG_HEAD.APPEARANCE_AFFINITY_DIM = 512 | |
| cfg.MODEL.SEM_SEG_HEAD.APPEARANCE_AFFINITY_PROJ_DIM = 128 | |
| cfg.MODEL.SEM_SEG_HEAD.DECODER_DIMS = [64, 32] | |
| cfg.MODEL.SEM_SEG_HEAD.DECODER_AFFINITY_DIMS = [256, 128] | |
| cfg.MODEL.SEM_SEG_HEAD.DECODER_AFFINITY_PROJ_DIMS = [32, 16] | |
| cfg.MODEL.SEM_SEG_HEAD.NUM_LAYERS = 4 | |
| cfg.MODEL.SEM_SEG_HEAD.NUM_HEADS = 4 | |
| cfg.MODEL.SEM_SEG_HEAD.HIDDEN_DIMS = 128 | |
| cfg.MODEL.SEM_SEG_HEAD.POOLING_SIZES = [6, 6] | |
| cfg.MODEL.SEM_SEG_HEAD.FEATURE_RESOLUTION = [24, 24] | |
| cfg.MODEL.SEM_SEG_HEAD.WINDOW_SIZES = 12 | |
| cfg.MODEL.SEM_SEG_HEAD.ATTENTION_TYPE = "linear" | |
| cfg.MODEL.SEM_SEG_HEAD.PROMPT_DEPTH = 0 | |
| cfg.MODEL.SEM_SEG_HEAD.PROMPT_LENGTH = 0 | |
| cfg.SOLVER.CLIP_MULTIPLIER = 0.01 | |
| cfg.MODEL.SEM_SEG_HEAD.CLIP_FINETUNE = "attention" | |
| cfg.TEST.SLIDING_WINDOW = False |