Source code for fl4health.model_bases.masked_layers.masked_conv

from __future__ import annotations

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.nn.common_types import _size_1_t, _size_2_t, _size_3_t
from torch.nn.modules.utils import _pair, _single, _triple
from torch.nn.parameter import Parameter

from fl4health.utils.functions import bernoulli_sample

[docs] class MaskedConv1d(nn.Conv1d):
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_1_t, stride: _size_1_t = 1, padding: str | _size_1_t = 0, dilation: _size_1_t = 1, groups: int = 1, bias: bool = True, padding_mode: str = "zeros", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: """ Implementation of masked Conv1d layers. Like regular Conv1d layers (i.e., nn.Conv1d module), a masked convolutional layer has a weight (i.e., convolutional filter) and a (optional) bias. However, the weight and the bias do not receive gradient in back propagation. Instead, two score tensors - one for the weight and another for the bias - are maintained. In the forward pass, the score tensors are transformed by the Sigmoid function into probability scores, which are then used to produce binary masks via Bernoulli sampling. Finally, the binary masks are applied to the weight and the bias. During training, gradients with respect to the score tensors are computed and used to update the score tensors. **NOTE:** The scores are not assumed to be bounded between 0 and 1. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int, tuple or str, optional): Padding added to both sides of the input. Default: 0 padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` """ # Attributes: # weight: weights of the module. # bias: bias of the module. # weight_score: learnable scores for the weights. Has the same shape as weight. # bias_score: learnable scores for the bias. Has the same shape as bias. super().__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, device, dtype, ) self.weight.requires_grad = False self.weight_scores = Parameter(torch.randn_like(self.weight), requires_grad=True) if bias: assert self.bias is not None self.bias.requires_grad = False self.bias_scores = Parameter(torch.randn_like(self.bias), requires_grad=True) else: self.register_parameter("bias_scores", None)
[docs] def forward(self, input: Tensor) -> Tensor: """ Forward for the mask 1D Convolution Args: input (Tensor): input tensor for the layer Returns: Tensor: output tensor for the convolution """ weight_prob_scores = torch.sigmoid(self.weight_scores) weight_mask = bernoulli_sample(weight_prob_scores) masked_weight = weight_mask * self.weight if self.bias is not None: bias_prob_scores = torch.sigmoid(self.bias_scores) bias_mask = bernoulli_sample(bias_prob_scores) masked_bias = bias_mask * self.bias else: masked_bias = None return self._conv_forward(input, weight=masked_weight, bias=masked_bias)
[docs] @classmethod def from_pretrained(cls, conv_module: nn.Conv1d) -> MaskedConv1d: """ Return an instance of ``MaskedConv1d`` whose weight and bias have the same values as those of ``conv_module``. Args: conv_module (nn.Conv1d): Module to be converted Returns: MaskedConv1d: Module with masked layers added to enable FedPM training """ has_bias = conv_module.bias is not None # we create new variables below to make mypy happy since kernel_size has # type int | tuple[int] and kernel_size_ has type tuple[int] kernel_size_ = _single(conv_module.kernel_size) stride_ = _single(conv_module.stride) padding_ = conv_module.padding if isinstance(conv_module.padding, str) else _single(conv_module.padding) dilation_ = _single(conv_module.dilation) masked_conv_module = cls( in_channels=conv_module.in_channels, out_channels=conv_module.out_channels, kernel_size=kernel_size_, stride=stride_, padding=padding_, dilation=dilation_, groups=conv_module.groups, bias=has_bias, padding_mode=conv_module.padding_mode, ) masked_conv_module.weight = Parameter(conv_module.weight.clone().detach(), requires_grad=False) masked_conv_module.weight_scores = Parameter(torch.randn_like(masked_conv_module.weight), requires_grad=True) if has_bias: assert conv_module.bias is not None masked_conv_module.bias = Parameter(conv_module.bias.clone().detach(), requires_grad=False) masked_conv_module.bias_scores = Parameter(torch.randn_like(masked_conv_module.bias), requires_grad=True) return masked_conv_module
[docs] class MaskedConv2d(nn.Conv2d):
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_2_t, stride: _size_2_t = 1, padding: str | _size_2_t = 0, dilation: _size_2_t = 1, groups: int = 1, bias: bool = True, padding_mode: str = "zeros", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: """ Implementation of masked Conv2d layers. Like regular Conv2d layers (i.e., nn.Conv2d module), a masked convolutional layer has a weight (i.e., convolutional filter) and a (optional) bias. However, the weight and the bias do not receive gradient in back propagation. Instead, two score tensors - one for the weight and another for the bias - are maintained. In the forward pass, the score tensors are transformed by the Sigmoid function into probability scores, which are then used to produce binary masks via Bernoulli sampling. Finally, the binary masks are applied to the weight and the bias. During training, gradients with respect to the score tensors are computed and used to update the score tensors. **NOTE:** The scores are not assumed to be bounded between 0 and 1. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: 0 padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` """ # Attributes: # weight: weights of the module. # bias: bias of the module. # weight_score: learnable scores for the weights. Has the same shape as weight. # bias_score: learnable scores for the bias. Has the same shape as bias. super().__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, device, dtype, ) self.weight.requires_grad = False self.weight_scores = Parameter(torch.randn_like(self.weight), requires_grad=True) if bias: assert self.bias is not None self.bias.requires_grad = False self.bias_scores = Parameter(torch.randn_like(self.bias), requires_grad=True) else: self.register_parameter("bias_scores", None)
[docs] def forward(self, input: Tensor) -> Tensor: """ Forward for the Masked 2D Convolution Args: input (Tensor): input tensor for the layer Returns: Tensor: output tensor for the convolution """ weight_prob_scores = torch.sigmoid(self.weight_scores) weight_mask = bernoulli_sample(weight_prob_scores) masked_weight = weight_mask * self.weight if self.bias is not None: bias_prob_scores = torch.sigmoid(self.bias_scores) bias_mask = bernoulli_sample(bias_prob_scores) masked_bias = bias_mask * self.bias else: masked_bias = None return self._conv_forward(input, weight=masked_weight, bias=masked_bias)
[docs] @classmethod def from_pretrained(cls, conv_module: nn.Conv2d) -> MaskedConv2d: """ Return an instance of MaskedConv2d whose weight and bias have the same values as those of conv_module. Args: conv_module (nn.Conv2d): Module to be converted Returns: MaskedConv2d: Module with masked layers to enable FedPM """ has_bias = conv_module.bias is not None kernel_size_ = _pair(conv_module.kernel_size) stride_ = _pair(conv_module.stride) padding_ = conv_module.padding if isinstance(conv_module.padding, str) else _pair(conv_module.padding) dilation_ = _pair(conv_module.dilation) masked_conv_module = cls( in_channels=conv_module.in_channels, out_channels=conv_module.out_channels, kernel_size=kernel_size_, stride=stride_, padding=padding_, dilation=dilation_, groups=conv_module.groups, bias=has_bias, padding_mode=conv_module.padding_mode, ) masked_conv_module.weight = Parameter(conv_module.weight.clone().detach(), requires_grad=False) masked_conv_module.weight_scores = Parameter(torch.randn_like(masked_conv_module.weight), requires_grad=True) if has_bias: assert conv_module.bias is not None masked_conv_module.bias = Parameter(conv_module.bias.clone().detach(), requires_grad=False) masked_conv_module.bias_scores = Parameter(torch.randn_like(masked_conv_module.bias), requires_grad=True) return masked_conv_module
[docs] class MaskedConv3d(nn.Conv3d):
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_3_t, stride: _size_3_t = 1, padding: str | _size_3_t = 0, dilation: _size_3_t = 1, groups: int = 1, bias: bool = True, padding_mode: str = "zeros", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: """ Implementation of masked Conv2d layers. Like regular Conv3d layers (i.e., nn.Conv3d module), a masked convolutional layer has a weight (i.e., convolutional filter) and a (optional) bias. However, the weight and the bias do not receive gradient in back propagation. Instead, two score tensors - one for the weight and another for the bias - are maintained. In the forward pass, the score tensors are transformed by the Sigmoid function into probability scores, which are then used to produce binary masks via Bernoulli sampling. Finally, the binary masks are applied to the weight and the bias. During training, gradients with respect to the score tensors are computed and used to update the score tensors. **NOTE:** The scores are not assumed to be bounded between 0 and 1. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int, tuple or str, optional): Padding added to all six sides of the input. Default: 0 padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` """ # Attributes: # weight: weights of the module. # bias: bias of the module. # weight_score: learnable scores for the weights. Has the same shape as weight. # bias_score: learnable scores for the bias. Has the same shape as bias. super().__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, device, dtype, ) self.weight.requires_grad = False self.weight_scores = Parameter(torch.randn_like(self.weight), requires_grad=True) if bias: assert self.bias is not None self.bias.requires_grad = False self.bias_scores = Parameter(torch.randn_like(self.bias), requires_grad=True) else: self.register_parameter("bias_scores", None)
[docs] def forward(self, input: Tensor) -> Tensor: """ Forward for the Masked 3D Convolution Args: input (Tensor): input tensor for the layer Returns: Tensor: output tensor for the convolution """ weight_prob_scores = torch.sigmoid(self.weight_scores) weight_mask = bernoulli_sample(weight_prob_scores) masked_weight = weight_mask * self.weight if self.bias is not None: bias_prob_scores = torch.sigmoid(self.bias_scores) bias_mask = bernoulli_sample(bias_prob_scores) masked_bias = bias_mask * self.bias else: masked_bias = None return self._conv_forward(input, weight=masked_weight, bias=masked_bias)
[docs] @classmethod def from_pretrained(cls, conv_module: nn.Conv3d) -> MaskedConv3d: """ Return an instance of MaskedConv3d whose weight and bias have the same values as those of conv_module. Args: conv_module (nn.Conv3d): Module to convert Returns: MaskedConv3d: Module with mask layers added to enable FedPM """ has_bias = conv_module.bias is not None kernel_size_ = _triple(conv_module.kernel_size) stride_ = _triple(conv_module.stride) padding_ = conv_module.padding if isinstance(conv_module.padding, str) else _triple(conv_module.padding) dilation_ = _triple(conv_module.dilation) masked_conv_module = cls( in_channels=conv_module.in_channels, out_channels=conv_module.out_channels, kernel_size=kernel_size_, stride=stride_, padding=padding_, dilation=dilation_, groups=conv_module.groups, bias=has_bias, padding_mode=conv_module.padding_mode, ) masked_conv_module.weight = Parameter(conv_module.weight.clone().detach(), requires_grad=False) masked_conv_module.weight_scores = Parameter(torch.randn_like(masked_conv_module.weight), requires_grad=True) if has_bias: assert conv_module.bias is not None masked_conv_module.bias = Parameter(conv_module.bias.clone().detach(), requires_grad=False) masked_conv_module.bias_scores = Parameter(torch.randn_like(masked_conv_module.bias), requires_grad=True) return masked_conv_module
[docs] class MaskedConvTranspose1d(nn.ConvTranspose1d):
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_1_t, stride: _size_1_t = 1, padding: _size_1_t = 0, output_padding: _size_1_t = 0, groups: int = 1, bias: bool = True, dilation: _size_1_t = 1, padding_mode: str = "zeros", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: """ Implementation of masked ``ConvTranspose1d`` layers. For more information on transposed convolution, please see the PyTorch implementation of ``nn.Conv1d.`` ( Like regular ``ConvTranspose1d`` layers (i.e., ``nn.ConvTranspose1d`` module), a masked transpose convolutional layer has a weight (i.e., convolutional filter) and a (optional) bias. However, the weight and the bias do not receive gradient in back propagation. Instead, two score tensors - one for the weight and another for the bias - are maintained. In the forward pass, the score tensors are transformed by the Sigmoid function into probability scores, which are then used to produce binary masks via Bernoulli sampling. Finally, the binary masks are applied to the weight and the bias. During training, gradients with respect to the score tensors are computed and used to update the score tensors. **NOTE:** The scores are not assumed to be bounded between 0 and 1. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the transposed convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both sides of the input. Default: 0 output_padding (int or tuple, optional): Additional size added to one side of the output shape. Default: 0 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 """ # Attributes: # weight (Tensor): weights of the module. # bias (Tensor): bias of the module. # weight_score: learnable scores for the weights. Has the same shape as weight. # bias_score: learnable scores for the bias. Has the same shape as bias. super().__init__( in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode, device, dtype, ) self.weight.requires_grad = False self.weight_scores = Parameter(torch.randn_like(self.weight), requires_grad=True) if bias: assert self.bias is not None self.bias.requires_grad = False self.bias_scores = Parameter(torch.randn_like(self.bias), requires_grad=True) else: self.register_parameter("bias_scores", None)
[docs] def forward(self, input: Tensor, output_size: list[int] | None = None) -> Tensor: """ Forward for the ``MaskedConvTranspose1D`` Args: input (Tensor): input to be mapped with the module output_size (list[int] | None, optional): Desired output from the transpose. Defaults to None. Raises: ValueError: If something other than "zeros" padding has been requested. Returns: Tensor: Output tensors. """ # Note: the same check is already present in super().__init__ if self.padding_mode != "zeros": raise ValueError("Only `zeros` padding mode is supported for ConvTranspose1d") assert isinstance(self.padding, tuple) # (The type ignore below is just used to resolve some small typing issue.) # One cannot replace List by Tuple or Sequence in "_output_padding" # because TorchScript does not support `Sequence[T]` or `tuple[T, ...]`. output_padding = self._output_padding( input, output_size, self.stride, # type: ignore[arg-type] self.padding, # type: ignore[arg-type] self.kernel_size, # type: ignore[arg-type] num_spatial_dims=1, dilation=self.dilation, # type: ignore[arg-type] ) weight_prob_scores = torch.sigmoid(self.weight_scores) weight_mask = bernoulli_sample(weight_prob_scores) masked_weight = weight_mask * self.weight if self.bias is not None: bias_prob_scores = torch.sigmoid(self.bias_scores) bias_mask = bernoulli_sample(bias_prob_scores) masked_bias = bias_mask * self.bias else: masked_bias = None return F.conv_transpose1d( input, masked_weight, masked_bias, self.stride, self.padding, output_padding, self.groups, self.dilation )
[docs] @classmethod def from_pretrained(cls, conv_module: nn.ConvTranspose1d) -> MaskedConvTranspose1d: """ Return an instance of ``MaskedConvTranspose1d`` whose weight and bias have the same values as those of ``conv_module``. Args: conv_module (nn.ConvTranspose1d): Target module to be converted Returns: MaskedConvTranspose1d: Module with masked layers to enable FedPM """ has_bias = conv_module.bias is not None # we create new variables below to make mypy happy since kernel_size has # type int | tuple[int] and kernel_size_ has type tuple[int] kernel_size_ = _single(conv_module.kernel_size) stride_ = _single(conv_module.stride) padding_ = _single(conv_module.padding) dilation_ = _single(conv_module.dilation) output_padding_ = _single(conv_module.output_padding) masked_conv_module = cls( in_channels=conv_module.in_channels, out_channels=conv_module.out_channels, kernel_size=kernel_size_, stride=stride_, padding=padding_, output_padding=output_padding_, groups=conv_module.groups, bias=has_bias, dilation=dilation_, padding_mode=conv_module.padding_mode, ) masked_conv_module.weight = Parameter(conv_module.weight.clone().detach(), requires_grad=False) masked_conv_module.weight_scores = Parameter(torch.randn_like(masked_conv_module.weight), requires_grad=True) if has_bias: assert conv_module.bias is not None masked_conv_module.bias = Parameter(conv_module.bias.clone().detach(), requires_grad=False) masked_conv_module.bias_scores = Parameter(torch.randn_like(masked_conv_module.bias), requires_grad=True) return masked_conv_module
[docs] class MaskedConvTranspose2d(nn.ConvTranspose2d):
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_2_t, stride: _size_2_t = 1, padding: _size_2_t = 0, output_padding: _size_2_t = 0, groups: int = 1, bias: bool = True, dilation: _size_2_t = 1, padding_mode: str = "zeros", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: """ Implementation of masked ``ConvTranspose2d`` layers. For more information on transposed convolution, please see the PyTorch implementation of ``nn.Conv2d``. ( Like regular ``ConvTranspose2d`` layers (i.e., ``nn.ConvTranspose2d`` module), a masked transpose convolutional layer has a weight (i.e., convolutional filter) and a (optional) bias. However, the weight and the bias do not receive gradient in back propagation. Instead, two score tensors - one for the weight and another for the bias - are maintained. In the forward pass, the score tensors are transformed by the Sigmoid function into probability scores, which are then used to produce binary masks via Bernoulli sampling. Finally, the binary masks are applied to the weight and the bias. During training, gradients with respect to the score tensors are computed and used to update the score tensors. **NOTE:** The scores are not assumed to be bounded between 0 and 1. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the transposed convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both sides of each dimension in the input. Default: 0 output_padding (int or tuple, optional): Additional size added to one side of each dimension in the output shape. Default: 0 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 """ # Attributes: # weight (Tensor): weights of the module. # bias (Tensor): bias of the module. # weight_score: learnable scores for the weights. Has the same shape as weight. # bias_score: learnable scores for the bias. Has the same shape as bias. super().__init__( in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode, device, dtype, ) self.weight.requires_grad = False self.weight_scores = Parameter(torch.randn_like(self.weight), requires_grad=True) if bias: assert self.bias is not None self.bias.requires_grad = False self.bias_scores = Parameter(torch.randn_like(self.bias), requires_grad=True) else: self.register_parameter("bias_scores", None)
[docs] def forward(self, input: Tensor, output_size: list[int] | None = None) -> Tensor: """ Maps input tensor through the ``MaskedConvTranspose2D`` module Args: input (Tensor): tensor to be mapped output_size (list[int] | None, optional): Desired output size from the module. Defaults to None. Raises: ValueError: Thrown if anything except "zeros" padding is requested Returns: Tensor: Mapped tensor """ # Note: the same check is already present in super().__init__ if self.padding_mode != "zeros": raise ValueError("Only `zeros` padding mode is supported for ConvTranspose1d") assert isinstance(self.padding, tuple) output_padding = self._output_padding( input, output_size, self.stride, # type: ignore[arg-type] self.padding, # type: ignore[arg-type] self.kernel_size, # type: ignore[arg-type] num_spatial_dims=2, dilation=self.dilation, # type: ignore[arg-type] ) weight_prob_scores = torch.sigmoid(self.weight_scores) weight_mask = bernoulli_sample(weight_prob_scores) masked_weight = weight_mask * self.weight if self.bias is not None: bias_prob_scores = torch.sigmoid(self.bias_scores) bias_mask = bernoulli_sample(bias_prob_scores) masked_bias = bias_mask * self.bias else: masked_bias = None return F.conv_transpose2d( input, masked_weight, masked_bias, self.stride, self.padding, output_padding, self.groups, self.dilation )
[docs] @classmethod def from_pretrained(cls, conv_module: nn.ConvTranspose2d) -> MaskedConvTranspose2d: """ Return an instance of ``MaskedConvTranspose2d`` whose weight and bias have the same values as those of ``conv_module``. Args: conv_module (nn.ConvTranspose2d): Target module to be converted Returns: MaskedConvTranspose2d: Module with mask layers added to enable FedPM """ has_bias = conv_module.bias is not None # we create new variables below to make mypy happy since kernel_size has # type int | tuple[int] and kernel_size_ has type tuple[int] kernel_size_ = _pair(conv_module.kernel_size) stride_ = _pair(conv_module.stride) padding_ = _pair(conv_module.padding) dilation_ = _pair(conv_module.dilation) output_padding_ = _pair(conv_module.output_padding) masked_conv_module = cls( in_channels=conv_module.in_channels, out_channels=conv_module.out_channels, kernel_size=kernel_size_, stride=stride_, padding=padding_, output_padding=output_padding_, groups=conv_module.groups, bias=has_bias, dilation=dilation_, padding_mode=conv_module.padding_mode, ) masked_conv_module.weight = Parameter(conv_module.weight.clone().detach(), requires_grad=False) masked_conv_module.weight_scores = Parameter(torch.randn_like(masked_conv_module.weight), requires_grad=True) if has_bias: assert conv_module.bias is not None masked_conv_module.bias = Parameter(conv_module.bias.clone().detach(), requires_grad=False) masked_conv_module.bias_scores = Parameter(torch.randn_like(masked_conv_module.bias), requires_grad=True) return masked_conv_module
[docs] class MaskedConvTranspose3d(nn.ConvTranspose3d):
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_3_t, stride: _size_3_t = 1, padding: _size_3_t = 0, output_padding: _size_3_t = 0, groups: int = 1, bias: bool = True, dilation: _size_3_t = 1, padding_mode: str = "zeros", device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: """ Implementation of masked ``ConvTranspose3d`` layers. For more information on transposed convolution, please see the PyTorch implementation of ``nn.Conv3d``. ( Like regular ``ConvTranspose3d`` layers (i.e., ``nn.ConvTranspose3d`` module), a masked transpose convolutional layer has a weight (i.e., convolutional filter) and a (optional) bias. However, the weight and the bias do not receive gradient in back propagation. Instead, two score tensors - one for the weight and another for the bias - are maintained. In the forward pass, the score tensors are transformed by the Sigmoid function into probability scores, which are then used to produce binary masks via Bernoulli sampling. Finally, the binary masks are applied to the weight and the bias. During training, gradients with respect to the score tensors are computed and used to update the score tensors. **NOTE:** The scores are not assumed to be bounded between 0 and 1. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the transposed convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both sides of each dimension in the input. Default: 0 output_padding (int or tuple, optional): Additional size added to one side of each dimension in the output shape. Default: 0 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 """ # Attributes: # weight (Tensor): weights of the module. # bias (Tensor): bias of the module. # weight_score: learnable scores for the weights. Has the same shape as weight. # bias_score: learnable scores for the bias. Has the same shape as bias. super().__init__( in_channels, out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation, padding_mode, device, dtype, ) self.weight.requires_grad = False self.weight_scores = Parameter(torch.randn_like(self.weight), requires_grad=True) if bias: assert self.bias is not None self.bias.requires_grad = False self.bias_scores = Parameter(torch.randn_like(self.bias), requires_grad=True) else: self.register_parameter("bias_scores", None)
[docs] def forward(self, input: Tensor, output_size: list[int] | None = None) -> Tensor: """ Maps the input tensor with ``MaskedConvTranspose3D`` Args: input (Tensor): Tensor to be mapped output_size (list[int] | None, optional): Desired output size from the transpose. Defaults to None. Raises: ValueError: Throws if anything except "zeros" padding is requested Returns: Tensor: Mapped tensor """ # Note: the same check is already present in super().__init__ if self.padding_mode != "zeros": raise ValueError("Only `zeros` padding mode is supported for ConvTranspose1d") assert isinstance(self.padding, tuple) output_padding = self._output_padding( input, output_size, self.stride, # type: ignore[arg-type] self.padding, # type: ignore[arg-type] self.kernel_size, # type: ignore[arg-type] num_spatial_dims=3, dilation=self.dilation, # type: ignore[arg-type] ) weight_prob_scores = torch.sigmoid(self.weight_scores) weight_mask = bernoulli_sample(weight_prob_scores) masked_weight = weight_mask * self.weight if self.bias is not None: bias_prob_scores = torch.sigmoid(self.bias_scores) bias_mask = bernoulli_sample(bias_prob_scores) masked_bias = bias_mask * self.bias else: masked_bias = None return F.conv_transpose3d( input, masked_weight, masked_bias, self.stride, self.padding, output_padding, self.groups, self.dilation )
[docs] @classmethod def from_pretrained(cls, conv_module: nn.ConvTranspose3d) -> MaskedConvTranspose3d: """ Return an instance of ``MaskedConvTranspose3d`` whose weight and bias have the same values as those of ``conv_module``. Args: conv_module (nn.ConvTranspose3d): Target module to be converted Returns: MaskedConvTranspose3d: Module with masked layers added to enable FedPM """ has_bias = conv_module.bias is not None # we create new variables below to make mypy happy since kernel_size has # type int | tuple[int] and kernel_size_ has type tuple[int] kernel_size_ = _triple(conv_module.kernel_size) stride_ = _triple(conv_module.stride) padding_ = _triple(conv_module.padding) dilation_ = _triple(conv_module.dilation) output_padding_ = _triple(conv_module.output_padding) masked_conv_module = cls( in_channels=conv_module.in_channels, out_channels=conv_module.out_channels, kernel_size=kernel_size_, stride=stride_, padding=padding_, output_padding=output_padding_, groups=conv_module.groups, bias=has_bias, dilation=dilation_, padding_mode=conv_module.padding_mode, ) masked_conv_module.weight = Parameter(conv_module.weight.clone().detach(), requires_grad=False) masked_conv_module.weight_scores = Parameter(torch.randn_like(masked_conv_module.weight), requires_grad=True) if has_bias: assert conv_module.bias is not None masked_conv_module.bias = Parameter(conv_module.bias.clone().detach(), requires_grad=False) masked_conv_module.bias_scores = Parameter(torch.randn_like(masked_conv_module.bias), requires_grad=True) return masked_conv_module