I wrote simple 3D-Unet arch in pytorch to do segmentation on 3D images.
class UNet3D(nn.Module):
def __init__(self, in_channels, out_channels):
super(UNet3D, self).__init__()
# Encoder
self.encoder = nn.Sequential(
nn.Conv3d(in_channels, 64, kernel_size=(3,3,3), padding=1),
nn.ReLU(inplace=True),
nn.Conv3d(64, 64, kernel_size=(3,3,3), padding=1),
nn.ReLU(inplace=True),
nn.MaxPool3d(kernel_size=(2,2,2), stride=2)
)
# Middle
self.middle = nn.Sequential(
nn.Conv3d(64, 128, kernel_size=(3,3,3), padding=1),
nn.ReLU(inplace=True),
nn.Conv3d(128, 128, kernel_size=(3,3,3), padding=1),
nn.ReLU(inplace=True),
nn.MaxPool3d(kernel_size=(2,2,2), stride=2)
)
# Decoder
self.decoder = nn.Sequential(
nn.ConvTranspose3d(128, 64, kernel_size=(2,2,2), stride=2),
nn.ReLU(inplace=True),
nn.ConvTranspose3d(64, 64, kernel_size=(3,3,3), padding=1),
nn.ReLU(inplace=True),
nn.ConvTranspose3d(64, out_channels, kernel_size=(2,2,2), stride=2)
)
def forward(self, x):
# Encoder
print(x.shape)
print(type(x))
x1 = self.encoder(x)
# Middle
x2 = self.middle(x1)
print(x2.shape)
# Decoder
x3 = self.decoder(x2)
return x3
My images are one channel with rectangle size, so I am resizing them to 512
transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize((512, 512))])
my architecture summary seems good to me
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv3d-1 [-1, 64, 128, 128, 128] 1,792
ReLU-2 [-1, 64, 128, 128, 128] 0
Conv3d-3 [-1, 64, 128, 128, 128] 110,656
ReLU-4 [-1, 64, 128, 128, 128] 0
MaxPool3d-5 [-1, 64, 64, 64, 64] 0
Conv3d-6 [-1, 128, 64, 64, 64] 221,312
ReLU-7 [-1, 128, 64, 64, 64] 0
Conv3d-8 [-1, 128, 64, 64, 64] 442,496
ReLU-9 [-1, 128, 64, 64, 64] 0
MaxPool3d-10 [-1, 128, 32, 32, 32] 0
ConvTranspose3d-11 [-1, 64, 64, 64, 64] 65,600
ReLU-12 [-1, 64, 64, 64, 64] 0
ConvTranspose3d-13 [-1, 64, 64, 64, 64] 110,656
ReLU-14 [-1, 64, 64, 64, 64] 0
ConvTranspose3d-15 [-1, 1, 128, 128, 128] 513
================================================================
but I am getting following erro when I try to train the model
Given input size: (64x1x512x512). Calculated output size: (64x0x256x256). Output size is too small
Any idea how to fix it and what is going on
PS. the error comes from maxpooling3d layer from encoder part