How to handle “ValueError: Found input variables with inconsistent numbers of samples: [16979, 16976]” while creating confusion matrix?

How to handle ValueError: Found input variables with inconsistent numbers of samples: [16979, 16976] while creating confusion matrix?

I have tried checking in different post but could not find answer, please help.I am using CNN model here to train my model.I am getting above error, I am new to CNN classifier and sklearn libraries.

   train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   validation_split=0.2)  #validation_data = 20%

    test_datagen = ImageDataGenerator(rescale = 1./255)

    train_data = train_datagen.flow_from_directory(train_dataset_dir,
                                                 target_size = (86, 86),
                                                 batch_size = batch_Size,
                                                 class_mode = 'categorical',
                                                 shuffle = True,
                                                 subset = 'training')

    valid_data = train_datagen.flow_from_directory(train_dataset_dir,
                                                 target_size = (86, 86),
                                                 batch_size = batch_Size,
                                                 class_mode = 'categorical',
                                                 shuffle = True,
                                                 subset = 'validation')

    test_data = test_datagen.flow_from_directory(test_data_dir,
                                            target_size = (86, 86),
                                            batch_size = batch_Size,
                                            shuffle = False,
                                            class_mode = None)
    print(train_data.class_indices)

    STEP_SIZE_TRAIN=train_data.n//train_data.batch_size
    STEP_SIZE_VALID=valid_data.n//valid_data.batch_size
    history = model.fit(train_data,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_data,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10
    )

    #predict output
    import numpy as np

    STEP_SIZE_TEST=test_data.n//test_data.batch_size
    test_data.reset()
    y_pred=model.predict_generator(test_data,
    steps=STEP_SIZE_TEST,
    verbose=1)

    predicted_class=np.argmax(y_pred,axis=1)
    print(predicted_class)

    #Confusion Matrix and Classification Report
    import sklearn.metrics as metrics


    true_class = test_data.classes
    #true_class = tf.concat([y for y in test_data], axis=0)
    print(true_class.shape)
    print(predicted_class.shape)

    print(true_class)
    class_labels = list(test_data.class_indices.keys())  
    print(class_labels)

    print('Confusion Matrix')
    cm = metrics.confusion_matrix(true_class, predicted_class)
    print(cm)

Output:

(16979,)
(16976,)
[0 0 0 ... 1 1 1]
['closed', 'open']
Confusion Matrix
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_16552/2876274369.py in <module>
     13 
     14 print('Confusion Matrix')
---> 15 cm = metrics.confusion_matrix(true_class, predicted_class)
     16 print(cm)
     17 

p:condaenvsPRMLlibsite-packagessklearnutilsvalidation.py in inner_f(*args, **kwargs)
     70                           FutureWarning)
     71         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72         return f(**kwargs)
     73     return inner_f
     74 

p:condaenvsPRMLlibsite-packagessklearnmetrics_classification.py in confusion_matrix(y_true, y_pred, labels, sample_weight, normalize)
    274 
    275     """
--> 276     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    277     if y_type not in ("binary", "multiclass"):
    278         raise ValueError("%s is not supported" % y_type)

p:condaenvsPRMLlibsite-packagessklearnmetrics_classification.py in _check_targets(y_true, y_pred)
     79     y_pred : array or indicator matrix
     80     """
---> 81     check_consistent_length(y_true, y_pred)
     82     type_true = type_of_target(y_true)
     83     type_pred = type_of_target(y_pred)

p:condaenvsPRMLlibsite-packagessklearnutilsvalidation.py in check_consistent_length(*arrays)
    254     if len(uniques) > 1:
    255         raise ValueError("Found input variables with inconsistent numbers of"
--> 256                          " samples: %r" % [int(l) for l in lengths])
    257 
    258 

ValueError: Found input variables with inconsistent numbers of samples: [16979, 16976]

Answer

It seems to be a problem related to the steps parameter of the predict_generator.

You should add 1 to STEP_SIZE_TEST in order to include the last samples as follows:

STEP_SIZE_TEST=test_data.n//test_data.batch_size+1