#StackBounty: #python #tensorflow #keras #loss-function Joint training of multi-output Keras model

Bounty: 50

I’m writing two joint decoders in Keras, with one common input, two separate outputs, and a loss function that takes both outputs into account.

Here is the minimal Keras code that you can regenerate the error:

import tensorflow as tf
from scat import *

from keras.layers import Input, Reshape, Permute, Lambda, Flatten
from keras.layers.core import Dense
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Model
from keras import backend as K

def identity(x):
    return K.identity(x)

# custom loss function
def custom_loss():
    def my_loss(y_dummy, pred):
        fcn_loss_1 = tf.nn.softmax_cross_entropy_with_logits(labels=y_dummy[0], logits=pred[0])
        fcn_loss_2 = tf.nn.softmax_cross_entropy_with_logits(labels=y_dummy[1], logits=pred[1])
        fcn_loss_2 = tf.matrix_band_part(fcn_loss_2, 0, -1) - tf.matrix_band_part(fcn_loss_2, 0, 0)

        fcn_loss = tf.reduce_mean(fcn_loss_1) + 2 * tf.reduce_mean(fcn_loss_2)

        return fcn_loss
    return my_loss

def keras_version():
    input = Input(shape=(135,), name='feature_input')
    out1 = Dense(128, kernel_initializer='glorot_normal', activation='linear')(input)
    out1 = LeakyReLU(alpha=.2)(out1)
    out1 = Dense(256, kernel_initializer='glorot_normal', activation='linear')(out1)
    out1 = LeakyReLU(alpha=.2)(out1)
    out1 = Dense(512, kernel_initializer='glorot_normal', activation='linear')(out1)
    out1 = LeakyReLU(alpha=.2)(out1)
    out1 = Dense(45, kernel_initializer='glorot_normal', activation='linear')(out1)
    out1 = LeakyReLU(alpha=.2)(out1)
    out1 = Reshape((9, 5))(out1)

    out2 = Dense(128, kernel_initializer='glorot_normal', activation='linear')(input)
    out2 = LeakyReLU(alpha=.2)(out2)
    out2 = Dense(256, kernel_initializer='glorot_normal', activation='linear')(out2)
    out2 = LeakyReLU(alpha=.2)(out2)
    out2 = Dense(512, kernel_initializer='glorot_normal', activation='linear')(out2)
    out2 = LeakyReLU(alpha=.2)(out2)
    out2 = Dense(540, kernel_initializer='glorot_normal', activation='linear')(out2)
    out2 = LeakyReLU(alpha=.2)(out2)
    out2 = Reshape((9, 4, 15))(out2)
    out2 = Lambda(lambda x: K.dot(K.permute_dimensions(x, (0, 2, 1, 3)),
                                  K.permute_dimensions(x, (0, 2, 3, 1))), output_shape=(4,9,9))(out2)
    out2 = Flatten()(out2)
    out2 = Dense(324, kernel_initializer='glorot_normal', activation='linear')(out2)
    out2 = LeakyReLU(alpha=.2)(out2)
    out2 = Reshape((4, 9, 9))(out2)
    out2 = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 3, 1)))(out2)

    out1 = Lambda(identity, name='output_1')(out1)
    out2 = Lambda(identity, name='output_2')(out2)

    return Model(input, [out1, out2])

model = keras_version()
model.compile(loss=custom_loss(), optimizer='adam')

model.summary()

feature_final = np.random.normal(0,1,[5000, 9, 15])
train_features_array = np.random.normal(0,1,[5000, 9, 5])
train_adj_array = np.random.normal(0,1,[5000, 9, 9, 4])

feature_final = feature_final.reshape(-1, 135)
model.fit(feature_final, [train_features_array, train_adj_array],
                batch_size=50,
                epochs=10
                )

The error I get is:

File "...", line 135, in <module>
    epochs=10
File ".../keras/engine/training.py", line 1039, in fit
    validation_steps=validation_steps)
File ".../keras/backend/tensorflow_backend.py", line 2675, in _call
    fetched = self._callable_fn(*array_vals)
File ".../tensorflow/python/client/session.py", line 1458, in __call__
    run_metadata_ptr)
tensorflow.python.framework.errors_impl.InvalidArgumentError: input must be at least 2-dim, received shape: [9]
     [[{{node loss/output_1_loss/MatrixBandPart_1}}]]

On a second attempt, I tried writing two loss functions and using loss weights to combine them.

# custom loss function
def custom_loss_1():
    def my_loss_1(y_dummy, pred):
        fcn_loss_1 = tf.nn.softmax_cross_entropy_with_logits(labels=y_dummy[0], logits=pred[0])

        return tf.reduce_mean(fcn_loss_1)
    return my_loss_1

def custom_loss_2():
    def my_loss_2(y_dummy, pred):
        fcn_loss_2 = tf.nn.softmax_cross_entropy_with_logits(labels=y_dummy[1], logits=pred[1])
        fcn_loss_2 = tf.matrix_band_part(fcn_loss_2, 0, -1) - tf.matrix_band_part(fcn_loss_2, 0, 0)

        return tf.reduce_mean(fcn_loss_2)
    return my_loss_2

model.compile(loss={'output_1':custom_loss_1(), 'output_2':custom_loss_2()},
              loss_weights={'output_1':1.0, 'output_2':2.0}, optimizer='adam')

but I received

tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [20,25920], In[1]: [324,324]
     [[{{node dense_9/BiasAdd}}]]

In that case, it might actually be from the model itself. Here is the model.summary:

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
feature_input (InputLayer)      (None, 135)          0                                            
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 128)          17408       feature_input[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_5 (LeakyReLU)       (None, 128)          0           dense_5[0][0]                    
__________________________________________________________________________________________________
dense_6 (Dense)                 (None, 256)          33024       leaky_re_lu_5[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_6 (LeakyReLU)       (None, 256)          0           dense_6[0][0]                    
__________________________________________________________________________________________________
dense_7 (Dense)                 (None, 512)          131584      leaky_re_lu_6[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_7 (LeakyReLU)       (None, 512)          0           dense_7[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          17408       feature_input[0][0]              
__________________________________________________________________________________________________
dense_8 (Dense)                 (None, 540)          277020      leaky_re_lu_7[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 128)          0           dense_1[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_8 (LeakyReLU)       (None, 540)          0           dense_8[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 256)          33024       leaky_re_lu_1[0][0]              
__________________________________________________________________________________________________
reshape_2 (Reshape)             (None, 9, 4, 15)     0           leaky_re_lu_8[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_2 (LeakyReLU)       (None, 256)          0           dense_2[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 4, 9, 9)      0           reshape_2[0][0]                  
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 512)          131584      leaky_re_lu_2[0][0]              
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 324)          0           lambda_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_3 (LeakyReLU)       (None, 512)          0           dense_3[0][0]                    
__________________________________________________________________________________________________
dense_9 (Dense)                 (None, 324)          105300      flatten_1[0][0]                  
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 45)           23085       leaky_re_lu_3[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 324)          0           dense_9[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_4 (LeakyReLU)       (None, 45)           0           dense_4[0][0]                    
__________________________________________________________________________________________________
reshape_3 (Reshape)             (None, 4, 9, 9)      0           leaky_re_lu_9[0][0]              
__________________________________________________________________________________________________
reshape_1 (Reshape)             (None, 9, 5)         0           leaky_re_lu_4[0][0]              
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 9, 9, 4)      0           reshape_3[0][0]                  
__________________________________________________________________________________________________
output_1 (Lambda)               (None, 9, 5)         0           reshape_1[0][0]                  
__________________________________________________________________________________________________
output_2 (Lambda)               (None, 9, 9, 4)      0           lambda_2[0][0]                   
==================================================================================================
Total params: 769,437
Trainable params: 769,437
Non-trainable params: 0
__________________________________________________________________________________________________

If you think the model has an issue, please check “model”. This question is different from this question which uses only one output in the loss. Here is also the loss function from the original Tensorflow code:

# -- loss function
Y_1 = tf.placeholder(tf.float32, shape=[None, 9, 9, 4])
Y_2 = tf.placeholder(tf.float32, shape=[None, 9, 5])

loss_1 = tf.nn.softmax_cross_entropy_with_logits(labels=Y_2, logits=fcn(X)[0])
loss_2 = tf.nn.softmax_cross_entropy_with_logits(labels=Y_1, logits=fcn(X)[1])
loss_2 = tf.matrix_band_part(loss_2, 0, -1) - tf.matrix_band_part(loss_2, 0, 0)

loss = tf.reduce_mean(loss_1) + 2 * tf.reduce_mean(loss_2)


Get this bounty!!!

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.