Source code for zea.models.deeplabv3

"""DeepLabV3+ architecture for multi-class segmentation. For more details see https://arxiv.org/abs/1802.02611."""

import keras
from keras import layers, ops


[docs] def convolution_block( block_input, num_filters=256, kernel_size=3, dilation_rate=1, use_bias=False, ): """ Create a convolution block with batch normalization and ReLU activation. This is a standard building block used throughout the DeepLabV3+ architecture, consisting of Conv2D -> BatchNormalization -> ReLU. Args: block_input (Tensor): Input tensor to the convolution block num_filters (int): Number of output filters/channels. Defaults to 256. kernel_size (int): Size of the convolution kernel. Defaults to 3. dilation_rate (int): Dilation rate for dilated convolution. Defaults to 1. use_bias (bool): Whether to use bias in the convolution layer. Defaults to False. Returns: Tensor: Output tensor after convolution, batch normalization, and ReLU """ x = layers.Conv2D( num_filters, kernel_size=kernel_size, dilation_rate=dilation_rate, padding="same", use_bias=use_bias, kernel_initializer=keras.initializers.HeNormal(), )(block_input) x = layers.BatchNormalization()(x) return ops.nn.relu(x)
[docs] def DilatedSpatialPyramidPooling(dspp_input): """ Implement Atrous Spatial Pyramid Pooling (ASPP) module. ASPP captures multi-scale context by applying parallel atrous convolutions with different dilation rates. This helps the model understand objects at multiple scales. The module consists of: - Global average pooling branch - 1x1 convolution branch - 3x3 convolutions with dilation rates 6, 12, and 18 Reference: https://arxiv.org/abs/1706.05587 Args: dspp_input (Tensor): Input feature tensor from encoder Returns: Tensor: Multi-scale feature representation """ dims = dspp_input.shape x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input) x = convolution_block(x, kernel_size=1, use_bias=True) out_pool = layers.UpSampling2D( size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear", )(x) out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1) out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6) out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12) out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18) x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18]) output = convolution_block(x, kernel_size=1) return output
[docs] def DeeplabV3Plus(image_shape, num_classes, pretrained_weights=None): """ Build DeepLabV3+ model for semantic segmentation. DeepLabV3+ combines the benefits of spatial pyramid pooling and encoder-decoder architecture. It uses a ResNet50 backbone as encoder, ASPP for multi-scale feature extraction, and a simple decoder for recovering spatial details. Architecture: 1. Encoder: ResNet50 backbone with atrous convolutions 2. ASPP: Multi-scale feature extraction 3. Decoder: Simple decoder with skip connections 4. Output: Final segmentation prediction Reference: https://arxiv.org/abs/1802.02611 Args: image_shape (tuple): Input image shape as (height, width, channels) num_classes (int): Number of output classes for segmentation pretrained_weights (str, optional): Pretrained weights for ResNet50 backbone. Defaults to None. Returns: keras.Model: Complete DeepLabV3+ model """ model_input = keras.Input(shape=image_shape) # 3-channel grayscale as repeated single channel for ResNet50 model_input_3_channel = ops.concatenate([model_input, model_input, model_input], axis=-1) preprocessed = keras.applications.resnet50.preprocess_input(model_input_3_channel) resnet50 = keras.applications.ResNet50( weights=pretrained_weights, include_top=False, input_tensor=preprocessed ) x = resnet50.get_layer("conv4_block6_2_relu").output x = DilatedSpatialPyramidPooling(x) input_a = layers.UpSampling2D( size=(image_shape[0] // 4 // x.shape[1], image_shape[1] // 4 // x.shape[2]), interpolation="bilinear", )(x) input_b = resnet50.get_layer("conv2_block3_2_relu").output input_b = convolution_block(input_b, num_filters=48, kernel_size=1) x = layers.Concatenate(axis=-1)([input_a, input_b]) x = convolution_block(x) x = convolution_block(x) x = layers.UpSampling2D( size=(image_shape[0] // x.shape[1], image_shape[1] // x.shape[2]), interpolation="bilinear", )(x) model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x) return keras.Model(inputs=model_input, outputs=model_output)