Why am I getting the Graph disconnected value error while creating a stacking ensemble in Keras?

I am using Tensorflow 2.4 with Keras 2.4.0. I am creating a stacking ensemble using 3 Keras models that is trained for a three-class classification task. I am using a concatenation merge, where a single 9-element vector will be created from the three class-probabilities predicted by each of the 3 models. I define a hidden layer to interpret this input to a meta-learner, constituting a single hidden layer with 9 neurons and an output layer that will make its own probabilistic prediction. Each model has the same structure as shown below:

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 49, 512)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 49, 512)           2099200   
_________________________________________________________________
batch_normalization_2 (Batch (None, 49, 512)           2048      
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                1605696   
_________________________________________________________________
batch_normalization_3 (Batch (None, 64)                256       
_________________________________________________________________
predictions (Dense)          (None, 3)                 195       
=================================================================

The code is given below:

def model1(model_input):
    model1loss = load_model('model1.h5')
    x = model1loss.output
    model1 = Model(inputs=model1loss.input, 
                   outputs=x, 
                   name='model1')
    return model1
model_loss1 = model1(model_input)
model_loss1.summary()

def model2(model_input):
    model2loss = load_model('model2.h5')
    x = model2loss.output
    model2 = Model(inputs=model2loss.input, 
                   outputs=x, 
                   name='model2')
    return model2
model_loss2 = model2(model_input)
model_loss2.summary()

def model3(model_input):
    model3loss = load_model('model3.h5')
    x = model3loss.output
    model3 = Model(inputs=model3loss.input, 
                   outputs=x, 
                   name='model3')
    return model3
model_loss3 = model3(model_input)
model_loss3.summary()

n_models = 3 #we have three models

def load_all_models(n_models):
    all_models = list()
    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.95, nesterov=True) 
    model_loss1.load_weights('model1.h5')    
    model_loss1.compile(optimizer=sgd,
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    all_models.append(model_loss1)
    model_loss2.load_weights('model2.h5')
    model_loss2.compile(optimizer=sgd,
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    all_models.append(model_loss2)
    model_loss3.load_weights('model3.h5')
    model_loss3.compile(optimizer=sgd,
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    all_models.append(model_loss3)
    return all_models

# load models
n_members = 3
members = load_all_models(n_members)
print('Loaded %d models' % len(members))


def define_stacked_model(members):
    for i in range(len(members)):
        model = members[i]
        for layer in model.layers:
        # make not trainable
            layer.trainable = False
            # rename to avoid 'unique layer name' issue
            layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
    # define multi-headed input
    ensemble_visible = [model.input]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)
    hidden = Dense(9, activation='relu')(merge) # three ouputs for 3 models, so 9 hidden neurons
    output = Dense(3, activation='softmax')(hidden) #three classes
    model = Model(inputs=ensemble_visible, 
                  outputs=output, 
                  name = 'stacking_ensemble')    
    # compile
    model.compile(loss='categorical_crossentropy', 
                  optimizer='sgd', 
                  metrics=['accuracy'])
    return model

# define ensemble model
stacked_model = define_stacked_model(members)
stacked_model.summary()

On running the code, I get the Graph disconnected error as shown below:

ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'ensemble_2_input_1'") at layer "ensemble_2_block1_conv1". The following previous layers were accessed without issue: ['ensemble_3_block1_conv1']

Answer

Considering that we have a list of pre-trained models that accepts the same input and produce outputs of the same dimensionality (2D in our case):

input_dim = 10
members = [
    Sequential([Dense(3, activation='softmax', input_shape=(input_dim,))]),
    Sequential([Dense(3, activation='softmax', input_shape=(input_dim,))]),
    Sequential([Dense(3, activation='softmax', input_shape=(input_dim,))])
] # we can suppose that they are pretrained

you can merge them together in this simple way:

def define_stacked_model(members):
    
    all_models = []
    inp = Input((input_dim,)) # the same shared input for all the model
    
    for i,model in enumerate(members):
        for layer in model.layers:
            # make not trainable
            layer.trainable = False
            # rename to avoid 'unique layer name' issue
            layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
        all_models.append(model(inp))
        
    merge = Concatenate()(all_models)
    hidden = Dense(9, activation='relu')(merge)
    output = Dense(3, activation='softmax')(hidden)
    
    model = Model(inputs = inp, 
                  outputs = output, 
                  name = 'stacking_ensemble')    
    model.compile(loss='categorical_crossentropy', 
                  optimizer='sgd', 
                  metrics=['accuracy'])
    return model

Define dummy data and fit:

n_samples = 100
X = np.random.uniform(0,1, (n_samples,input_dim))
y = tf.keras.utils.to_categorical(np.random.randint(0,3, (n_samples,)))

stacked_model = define_stacked_model(members)
stacked_model.fit(X,y, epochs=3)