-
Notifications
You must be signed in to change notification settings - Fork 45.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10537 from srihari-humbarwadi:panoptic-deeplab
PiperOrigin-RevId: 452568716
- Loading branch information
Showing
18 changed files
with
3,308 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
346 changes: 346 additions & 0 deletions
346
official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,346 @@ | ||
# Copyright 2022 The TensorFlow Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Panoptic Deeplab configuration definition.""" | ||
import dataclasses | ||
import os | ||
from typing import List, Optional, Union | ||
|
||
import numpy as np | ||
|
||
from official.core import config_definitions as cfg | ||
from official.core import exp_factory | ||
from official.modeling import hyperparams | ||
from official.modeling import optimization | ||
from official.vision.configs import common | ||
from official.vision.configs import decoders | ||
from official.vision.configs.google import backbones | ||
|
||
|
||
_COCO_INPUT_PATH_BASE = 'coco/tfrecords' | ||
_COCO_TRAIN_EXAMPLES = 118287 | ||
_COCO_VAL_EXAMPLES = 5000 | ||
|
||
|
||
@dataclasses.dataclass | ||
class Parser(hyperparams.Config): | ||
"""Panoptic deeplab parser.""" | ||
ignore_label: int = 0 | ||
# If resize_eval_groundtruth is set to False, original image sizes are used | ||
# for eval. In that case, groundtruth_padded_size has to be specified too to | ||
# allow for batching the variable input sizes of images. | ||
resize_eval_groundtruth: bool = True | ||
groundtruth_padded_size: List[int] = dataclasses.field(default_factory=list) | ||
aug_scale_min: float = 1.0 | ||
aug_scale_max: float = 1.0 | ||
aug_rand_hflip: bool = True | ||
aug_type: common.Augmentation = common.Augmentation() | ||
sigma: float = 8.0 | ||
small_instance_area_threshold: int = 4096 | ||
small_instance_weight: float = 3.0 | ||
dtype = 'float32' | ||
|
||
|
||
@dataclasses.dataclass | ||
class TfExampleDecoder(common.TfExampleDecoder): | ||
"""A simple TF Example decoder config.""" | ||
panoptic_category_mask_key: str = 'image/panoptic/category_mask' | ||
panoptic_instance_mask_key: str = 'image/panoptic/instance_mask' | ||
|
||
|
||
@dataclasses.dataclass | ||
class DataDecoder(common.DataDecoder): | ||
"""Data decoder config.""" | ||
simple_decoder: TfExampleDecoder = TfExampleDecoder() | ||
|
||
|
||
@dataclasses.dataclass | ||
class DataConfig(cfg.DataConfig): | ||
"""Input config for training.""" | ||
decoder: DataDecoder = DataDecoder() | ||
parser: Parser = Parser() | ||
input_path: str = '' | ||
drop_remainder: bool = True | ||
file_type: str = 'tfrecord' | ||
is_training: bool = True | ||
global_batch_size: int = 1 | ||
|
||
|
||
@dataclasses.dataclass | ||
class PanopticDeeplabHead(hyperparams.Config): | ||
"""Panoptic Deeplab head config.""" | ||
level: int = 3 | ||
num_convs: int = 2 | ||
num_filters: int = 256 | ||
kernel_size: int = 5 | ||
use_depthwise_convolution: bool = False | ||
upsample_factor: int = 1 | ||
low_level: List[int] = dataclasses.field(default_factory=lambda: [3, 2]) | ||
low_level_num_filters: List[int] = dataclasses.field( | ||
default_factory=lambda: [64, 32]) | ||
fusion_num_output_filters: int = 256 | ||
|
||
|
||
@dataclasses.dataclass | ||
class SemanticHead(PanopticDeeplabHead): | ||
"""Semantic head config.""" | ||
prediction_kernel_size: int = 1 | ||
|
||
|
||
@dataclasses.dataclass | ||
class InstanceHead(PanopticDeeplabHead): | ||
"""Instance head config.""" | ||
prediction_kernel_size: int = 1 | ||
|
||
|
||
@dataclasses.dataclass | ||
class PanopticDeeplabPostProcessor(hyperparams.Config): | ||
"""Panoptic Deeplab PostProcessing config.""" | ||
output_size: List[int] = dataclasses.field( | ||
default_factory=list) | ||
center_score_threshold: float = 0.1 | ||
thing_class_ids: List[int] = dataclasses.field(default_factory=list) | ||
label_divisor: int = 256 * 256 * 256 | ||
stuff_area_limit: int = 4096 | ||
ignore_label: int = 0 | ||
nms_kernel: int = 7 | ||
keep_k_centers: int = 200 | ||
rescale_predictions: bool = True | ||
|
||
|
||
@dataclasses.dataclass | ||
class PanopticDeeplab(hyperparams.Config): | ||
"""Panoptic Deeplab model config.""" | ||
num_classes: int = 2 | ||
input_size: List[int] = dataclasses.field(default_factory=list) | ||
min_level: int = 3 | ||
max_level: int = 6 | ||
norm_activation: common.NormActivation = common.NormActivation() | ||
backbone: backbones.Backbone = backbones.Backbone( | ||
type='resnet', resnet=backbones.ResNet()) | ||
decoder: decoders.Decoder = decoders.Decoder(type='aspp') | ||
semantic_head: SemanticHead = SemanticHead() | ||
instance_head: InstanceHead = InstanceHead() | ||
shared_decoder: bool = False | ||
generate_panoptic_masks: bool = True | ||
post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor() | ||
|
||
|
||
@dataclasses.dataclass | ||
class Losses(hyperparams.Config): | ||
label_smoothing: float = 0.0 | ||
ignore_label: int = 0 | ||
class_weights: List[float] = dataclasses.field(default_factory=list) | ||
l2_weight_decay: float = 1e-4 | ||
top_k_percent_pixels: float = 0.15 | ||
segmentation_loss_weight: float = 1.0 | ||
center_heatmap_loss_weight: float = 200 | ||
center_offset_loss_weight: float = 0.01 | ||
|
||
|
||
@dataclasses.dataclass | ||
class Evaluation(hyperparams.Config): | ||
"""Evaluation config.""" | ||
ignored_label: int = 0 | ||
max_instances_per_category: int = 256 | ||
offset: int = 256 * 256 * 256 | ||
is_thing: List[float] = dataclasses.field( | ||
default_factory=list) | ||
rescale_predictions: bool = True | ||
report_per_class_pq: bool = False | ||
|
||
report_per_class_iou: bool = False | ||
report_train_mean_iou: bool = True # Turning this off can speed up training. | ||
|
||
|
||
@dataclasses.dataclass | ||
class PanopticDeeplabTask(cfg.TaskConfig): | ||
"""Panoptic deeplab task config.""" | ||
model: PanopticDeeplab = PanopticDeeplab() | ||
train_data: DataConfig = DataConfig(is_training=True) | ||
validation_data: DataConfig = DataConfig( | ||
is_training=False, | ||
drop_remainder=False) | ||
losses: Losses = Losses() | ||
init_checkpoint: Optional[str] = None | ||
init_checkpoint_modules: Union[ | ||
str, List[str]] = 'all' # all, backbone, and/or decoder | ||
evaluation: Evaluation = Evaluation() | ||
|
||
|
||
@exp_factory.register_config_factory('panoptic_deeplab_resnet_coco') | ||
def panoptic_deeplab_coco() -> cfg.ExperimentConfig: | ||
"""COCO panoptic segmentation with Panoptic Deeplab.""" | ||
train_steps = 200000 | ||
train_batch_size = 64 | ||
eval_batch_size = 1 | ||
steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size | ||
validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size | ||
|
||
num_panoptic_categories = 201 | ||
num_thing_categories = 91 | ||
ignore_label = 0 | ||
|
||
is_thing = [False] | ||
for idx in range(1, num_panoptic_categories): | ||
is_thing.append(True if idx <= num_thing_categories else False) | ||
|
||
input_size = [640, 640, 3] | ||
output_stride = 16 | ||
aspp_dilation_rates = [6, 12, 18] | ||
multigrid = [1, 2, 4] | ||
stem_type = 'v1' | ||
level = int(np.math.log2(output_stride)) | ||
|
||
config = cfg.ExperimentConfig( | ||
runtime=cfg.RuntimeConfig( | ||
mixed_precision_dtype='bfloat16', enable_xla=True), | ||
task=PanopticDeeplabTask( | ||
init_checkpoint='gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800', # pylint: disable=line-too-long | ||
init_checkpoint_modules=['backbone'], | ||
model=PanopticDeeplab( | ||
num_classes=num_panoptic_categories, | ||
input_size=input_size, | ||
backbone=backbones.Backbone( | ||
type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( | ||
model_id=50, | ||
stem_type=stem_type, | ||
output_stride=output_stride, | ||
multigrid=multigrid, | ||
se_ratio=0.25, | ||
last_stage_repeats=1, | ||
stochastic_depth_drop_rate=0.2)), | ||
decoder=decoders.Decoder( | ||
type='aspp', | ||
aspp=decoders.ASPP( | ||
level=level, | ||
num_filters=256, | ||
pool_kernel_size=input_size[:2], | ||
dilation_rates=aspp_dilation_rates, | ||
use_depthwise_convolution=True, | ||
dropout_rate=0.1)), | ||
semantic_head=SemanticHead( | ||
level=level, | ||
num_convs=1, | ||
num_filters=256, | ||
kernel_size=5, | ||
use_depthwise_convolution=True, | ||
upsample_factor=1, | ||
low_level=[3, 2], | ||
low_level_num_filters=[64, 32], | ||
fusion_num_output_filters=256, | ||
prediction_kernel_size=1), | ||
instance_head=InstanceHead( | ||
level=level, | ||
num_convs=1, | ||
num_filters=32, | ||
kernel_size=5, | ||
use_depthwise_convolution=True, | ||
upsample_factor=1, | ||
low_level=[3, 2], | ||
low_level_num_filters=[32, 16], | ||
fusion_num_output_filters=128, | ||
prediction_kernel_size=1), | ||
shared_decoder=False, | ||
generate_panoptic_masks=True, | ||
post_processor=PanopticDeeplabPostProcessor( | ||
output_size=input_size[:2], | ||
center_score_threshold=0.1, | ||
thing_class_ids=list(range(1, num_thing_categories)), | ||
label_divisor=256, | ||
stuff_area_limit=4096, | ||
ignore_label=ignore_label, | ||
nms_kernel=41, | ||
keep_k_centers=200, | ||
rescale_predictions=True)), | ||
losses=Losses( | ||
label_smoothing=0.0, | ||
ignore_label=ignore_label, | ||
l2_weight_decay=0.0, | ||
top_k_percent_pixels=0.2, | ||
segmentation_loss_weight=1.0, | ||
center_heatmap_loss_weight=200, | ||
center_offset_loss_weight=0.01), | ||
train_data=DataConfig( | ||
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), | ||
is_training=True, | ||
global_batch_size=train_batch_size, | ||
parser=Parser( | ||
aug_scale_min=0.5, | ||
aug_scale_max=1.5, | ||
aug_rand_hflip=True, | ||
aug_type=common.Augmentation( | ||
type='autoaug', | ||
autoaug=common.AutoAugment( | ||
augmentation_name='panoptic_deeplab_policy')), | ||
sigma=8.0, | ||
small_instance_area_threshold=4096, | ||
small_instance_weight=3.0)), | ||
validation_data=DataConfig( | ||
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), | ||
is_training=False, | ||
global_batch_size=eval_batch_size, | ||
parser=Parser( | ||
resize_eval_groundtruth=False, | ||
groundtruth_padded_size=[640, 640], | ||
aug_scale_min=1.0, | ||
aug_scale_max=1.0, | ||
aug_rand_hflip=False, | ||
aug_type=None, | ||
sigma=8.0, | ||
small_instance_area_threshold=4096, | ||
small_instance_weight=3.0), | ||
drop_remainder=False), | ||
evaluation=Evaluation( | ||
ignored_label=ignore_label, | ||
max_instances_per_category=256, | ||
offset=256*256*256, | ||
is_thing=is_thing, | ||
rescale_predictions=True, | ||
report_per_class_pq=False, | ||
report_per_class_iou=False, | ||
report_train_mean_iou=False)), | ||
trainer=cfg.TrainerConfig( | ||
train_steps=train_steps, | ||
validation_steps=validation_steps, | ||
validation_interval=steps_per_epoch, | ||
steps_per_loop=steps_per_epoch, | ||
summary_interval=steps_per_epoch, | ||
checkpoint_interval=steps_per_epoch, | ||
optimizer_config=optimization.OptimizationConfig({ | ||
'optimizer': { | ||
'type': 'adam', | ||
}, | ||
'learning_rate': { | ||
'type': 'polynomial', | ||
'polynomial': { | ||
'initial_learning_rate': 0.0005, | ||
'decay_steps': train_steps, | ||
'end_learning_rate': 0.0, | ||
'power': 0.9 | ||
} | ||
}, | ||
'warmup': { | ||
'type': 'linear', | ||
'linear': { | ||
'warmup_steps': 2000, | ||
'warmup_learning_rate': 0 | ||
} | ||
} | ||
})), | ||
restrictions=[ | ||
'task.train_data.is_training != None', | ||
'task.validation_data.is_training != None' | ||
]) | ||
return config |
Oops, something went wrong.