PyTorch: RuntimeError: The size of tensor a (224) must match the size of tensor b (244) at non-singleton dimension 3

I want to create and train AutoEncoder to extract features and use that features for the clustering algorithms. Right now I am getting errors while calculating the loss.

RuntimeError: The size of tensor a (224) must match the size of tensor b (244) at non-singleton dimension 3

and a warning

UserWarning: Using a target size (torch.Size([1, 3, 224, 244])) that is different to the input size (torch.Size([1, 3, 224, 224])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.

  • return F.mse_loss(input, target, reduction=self.reduction)*

I am using Pytorch.

Can anyone tell me what is wrong with this? In warning and error size of input and output is the same but it is saying it is different. The summary sizes of input and output images are as follow

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 16, 112, 112]             448
              ReLU-2         [-1, 16, 112, 112]               0
            Conv2d-3           [-1, 32, 56, 56]           4,640
              ReLU-4           [-1, 32, 56, 56]               0
            Conv2d-5           [-1, 64, 18, 18]         100,416
              ReLU-6           [-1, 64, 18, 18]               0
            Conv2d-7            [-1, 128, 3, 3]         401,536
              ReLU-8            [-1, 128, 3, 3]               0
            Conv2d-9            [-1, 256, 1, 1]         295,168
  ConvTranspose2d-10            [-1, 128, 3, 3]         295,040
             ReLU-11            [-1, 128, 3, 3]               0
  ConvTranspose2d-12           [-1, 64, 12, 12]         401,472
             ReLU-13           [-1, 64, 12, 12]               0
  ConvTranspose2d-14           [-1, 24, 28, 28]          75,288
             ReLU-15           [-1, 24, 28, 28]               0
  ConvTranspose2d-16           [-1, 16, 56, 56]           3,472
             ReLU-17           [-1, 16, 56, 56]               0
  ConvTranspose2d-18          [-1, 8, 111, 111]           1,160
             ReLU-19          [-1, 8, 111, 111]               0
  ConvTranspose2d-20          [-1, 3, 224, 224]             603
          Sigmoid-21          [-1, 3, 224, 224]               0
================================================================
Total params: 1,579,243
Trainable params: 1,579,243
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 9.94
Params size (MB): 6.02
Estimated Total Size (MB): 16.54
----------------------------------------------------------------
Min Value of input Image =  tensor(0.0627)
Max Value of input Image =  tensor(0.5098)
Input Image shape =  torch.Size([1, 3, 224, 244])
Output Image shape =  torch.Size([1, 3, 224, 224])

My Autoencoder class is

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
                      
            nn.Conv2d(3, 16, 3, stride=2, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(32, 64, 7, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(64, 128, 7, stride=5, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(128, 256, 3, stride=5, padding=1)  # b, 16, 10, 10
            
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, 3),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 7,stride=3, padding=1,output_padding=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 24, 7,stride=2, padding=1,output_padding=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(24, 16, 3, stride=2, padding=1,output_padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1),  # b, 1, 28, 28
            nn.ReLU(True),
            nn.ConvTranspose2d(8,3, 5, stride=2, padding=1,output_padding=1),  # b, 1, 28, 28
            nn.Sigmoid()
            
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

and training function is as follow

dataset = DatasetLoader('E:/DAL/Dataset/Images', get_transform(train=True))

torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset, indices[-50:])

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=0)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0)


model = autoencoder().cuda()
summary(model, (3, 224, 224))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)

total_loss = 0
for epoch in range(num_epochs):
    for data in data_loader:
        # print(data)
        img = data
        print("Min Value of input Image = ",torch.min(img))
        print("Max Value of input Image = ",torch.max(img))        
        img = Variable(img).cuda()
        # ===================forward=====================
        output = model(img)
        print("Input Image shape = ",img.shape)
        print("Output Image shape = ",output.shape)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    total_loss += loss.data
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, total_loss))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './dc_img/image_{}.png'.format(epoch))

torch.save(model.state_dict(), './conv_autoencoder.pth')

Dataloader Class and transform function is as follow

def get_transform(train):
    transforms = []    
   
    transforms.append(T.Resize((224,244)))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.RandomVerticalFlip(0.5))
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

class DatasetLoader(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(root)))
        

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx])
        
        img = Image.open(img_path).convert("RGB")   
        

        if self.transforms is not None:
            img = self.transforms(img)

        return img

    def __len__(self):
        return len(self.imgs)

Answer

Im pretty sure you got a typo in your get_transform function:

transforms.append(T.Resize((224,244)))

You probably wanted to resize it to (224, 224) instead of (224, 244).