#StackBounty: #tensorflow #keras #deep-learning #nlp #lstm Seq2Seq model return same vector for all sentences

Bounty: 50

I’m trying to generate an abstractive text summarization. I’m using word2vec for embedding and bi-lstm with 2 layer in encoder and bi-lstm with 1 layer in decoder, and also I’m using Attention. I trained the model and it always return same vector for all sentences of input. How can I fix this problem?

Training Code

latent_dim = 185
embedding_dim=128

encoder_inputs = Input(shape=(int(art_max_length),))

#embedding layer
enc_emb=Embedding(input_vocab_size+1,embedding_dim, weights=[x_emb_matrix_reduce],trainable=False)(encoder_inputs)

#encoder lstm 1
encoder_bi_lstm1 = Bidirectional(LSTM(latent_dim,
                                   return_sequences=True,
                                   return_state=True,
                                   dropout=0.4,
                                   recurrent_dropout=0.4), 
                                 merge_mode="concat")
encoder_output1, forward_state_h1, forward_state_c1, backward_state_h1, backward_state_c1 = encoder_bi_lstm1(enc_emb)
encoder_states1 = [forward_state_h1, forward_state_c1, backward_state_h1, backward_state_c1]

#encoder lstm 2
encoder_bi_lstm2 = Bidirectional(LSTM(latent_dim,
                                   return_sequences=True,
                                   return_state=True,
                                   dropout=0.4,
                                   recurrent_dropout=0.4), 
                                 merge_mode="concat")
encoder_output2, forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2 = encoder_bi_lstm2(encoder_output1)
encoder_states2 = [forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))

#embedding layer
dec_emb_layer = Embedding(output_vocab_size+1, embedding_dim, weights=[y_emb_matrix_reduce], trainable=False)
dec_emb = dec_emb_layer(decoder_inputs)

decoder_bi_lstm = Bidirectional(LSTM(latent_dim, 
                                  return_sequences=True, 
                                  return_state=True,
                                  dropout=0.4,
                                  recurrent_dropout=0.2),
                                 merge_mode="concat")
decoder_outputs, decoder_fwd_state_h1, decoder_fwd_state_c1, decoder_back_state_h1, decoder_back_state_c1 = decoder_bi_lstm(dec_emb,initial_state=encoder_states2)
decoder_states = [decoder_fwd_state_h1, decoder_fwd_state_c1, decoder_back_state_h1, decoder_back_state_c1]

# Attention layer
attn_layer = AttentionLayer(name='attention_layer')
attn_out, attn_states = attn_layer([encoder_output2, decoder_outputs])

# Concat attention input and decoder LSTM output
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

epochs = 75
batch_size = 3
learning_rate = 0.001
initial_accumulator_value = 0.1
name = 'Adagrad'
clipnorm = 1.0

opt = Adagrad(learning_rate=learning_rate, initial_accumulator_value=initial_accumulator_value, name=name, clipnorm=clipnorm)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy')
es = EarlyStopping(monitor='val_loss', mode='auto', verbose=1,patience=10)
history=model.fit(x_tr, y_tr, epochs=epochs, callbacks=[es], steps_per_epoch=250, validation_steps=10, batch_size=batch_size, validation_data=(x_val,y_val))

Inference Code

reverse_target_word_index = y_tokenizer.index_word
reverse_source_word_index = x_tokenizer.index_word
target_word_index = y_tokenizer.word_index

# Encode the input sequence to get the feature vector
encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_output2, forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2])

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h_fwd = Input(shape=(latent_dim,))
decoder_state_input_h_bwd = Input(shape=(latent_dim,))

decoder_state_input_c_fwd = Input(shape=(latent_dim,))
decoder_state_input_c_bwd = Input(shape=(latent_dim,))

decoder_hidden_state_input = Input(shape=(art_max_length,latent_dim*2))

# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs)

# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, decoder_fwd_state_h2, decoder_fwd_state_c2, decoder_back_state_h2, decoder_back_state_c2 = decoder_bi_lstm(dec_emb2, initial_state=[decoder_state_input_h_fwd, decoder_state_input_h_bwd, decoder_state_input_c_fwd, decoder_state_input_c_bwd])
decoder_states2 = [decoder_fwd_state_h2, decoder_fwd_state_c2, decoder_back_state_h2, decoder_back_state_c2]

#attention inference
attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])
decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_inf_concat)

# Final decoder model
decoder_model = Model(
    [decoder_inputs] + [decoder_hidden_state_input, decoder_state_input_h_fwd, decoder_state_input_h_bwd, decoder_state_input_c_fwd, decoder_state_input_c_bwd],
    [decoder_outputs2] + decoder_states2)

Code to generate summary

def seq2summary(input_seq):
    newString=''
    for i in input_seq:
            if((i[0]!=0) and (i[0]!=target_word_index['sostok']) and (i[0]!=target_word_index['eostok'])):
                newString=newString+reverse_target_word_index[i[0]]+' '
    return newString

def seq2text(input_seq):
    newString=''
    for i in input_seq:
        if(i!=0):
            newString=newString+reverse_source_word_index[i]+' '
    return newString

def decode_sequence(input_seq):
    e_out, e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))

    # Populate the first word of target sequence with the start word.
    target_seq[0, 0] = target_word_index['sostok']

    stop_condition = False
    decoded_sentence = ''
    
    while not stop_condition:
        output_tokens, h_fwd, c_fwd, h_bwd, c_bwd = decoder_model.predict([target_seq] + [e_out, e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd])

        return output_tokens[0, -1, :]
        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        
        sampled_token = reverse_target_word_index[sampled_token_index]

        if(sampled_token!='eostok'):
            decoded_sentence += ' '+sampled_token

        # Exit condition: either hit max length or find stop word.
        if (sampled_token == 'eostok'  or len(decoded_sentence.split()) >= (high_max_length-1)):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update internal states 
        e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd = h_fwd, c_fwd, h_bwd, c_bwd
    
    return decoded_sentence


Get this bounty!!!

#StackBounty: #amazon-web-services #tensorflow #python-3.9 AWS Deep Learning AMI with Python3.9

Bounty: 50

I tried using the formal AWS Deep Learning AMI.
It is published here: https://aws.amazon.com/marketplace/pp/prodview-d5wlsowr2cimk
(currently version 49.0)

My problem is that it uses Python3.7 while my code uses Python3.9

I’m wondering what should I do. I can upgrade the python on the machine to 3.9, but this will obviously require reinstalling TensorFlow and other libraries, and I wonder if I will break the optimizations that come by default on this image.

I also couldn’t find any other formal images of Python3.9, with GPU support and TensorFlow, OpenCV and others.


Get this bounty!!!

#StackBounty: #tensorflow #lstm #recurrent-neural-network #functional-api Correct way to use custom rnn cells with multiple inputs

Bounty: 100

I’m trying to create a custom cell of a recurrent neural network model where the cell accepts 2 tensors as input. The rnn layer is then linked to a dense layer. The problems occurs when I use the second input to calculate the output. In fact if I use the second input to calculate the output I get the error

InvalidArgumentError:  Trying to access element 10 in a list with 10 elements.
     [[{{node my_model/rnn/while/body/_1/my_model/rnn/while/TensorArrayV2Read_1/TensorListGetItem}}]] [Op:__inference_train_function_972]

while when I don’t use it the training continues without any errors. How to proceed?

class MyCell(keras.layers.Layer):
    def __init__():
        #other stuff
        
        self.scaling = 1.
        self.use_y = True
        super().__init__(**kwargs)

    def build(self, input_shape):
        #other stuff
        
        self.W_y = tf.random.uniform(shape=[input_shape[1][-1], self.units], minval=-1, maxval=1) *self.scaling
        self.bias = tf.random.uniform(shape=(self.units,), minval=-1, maxval=1, name="bias") * self.scaling

        self.built = True
        
    def call(self, inputs, states):
        prev_output = states[0]
        w_in = tf.matmul(inputs[0], self.kernel)
        w_rnn = tf.matmul(prev_output, self.rnn_kernel)
        
        if self.use_y:
            #if I calculate the output this way I get the error during the fit, the build method works well
            y_part = tf.matmul(inputs[1], self.W_y)
            output = prev_output + tf.nn.tanh(w_in + self.bias + w_rnn + y_part)
            print("inside call method")
            print("w_in shape", self.w_in.shape)
            print("w_bias shape", self.bias.shape)
            print("w_rnn shape", self.w_rnn.shape)
            print("y_part shape", y_part.shape)
        else:
            #if I calculate the ouput I get no error and the fit continues
            output = prev_output + tf.nn.tanh(w_in + self.bias + w_rnn)
        return output, [output]


class MyModel(keras.Model):
    def __init__(units=100,
                 x_train=None,
                 y_train=None,
                 **kwargs):
        super().__init__(**kwargs)
        
        #other stuff
        
        
        self.units = units
        
        self.x_input = tf.keras.layers.Input(shape=x_train.shape, name="x_train")
        self.y_input = tf.keras.layers.Input(shape=y_train.shape, name="y_train_teacher")
        self.rnn = tf.keras.layers.RNN(cell=MyCell())
        
    

        self.out = Sequential()
        self.out.add(tf.keras.layers.Dense(10))
        self.out.compile(loss="mse", optimizer="adam")

    def call(self, inputs):
        input_x = inputs[0]
        input_y = inputs[1]
        r = self.rnn((input_x, input_y))
        y = self.out(r)
        return y

Now I call the model

x_train, x_valid, y_train, y_valid = train_test_split(X_train,y_train,test_size=0.2,random_state=67)

#x_train.shape -> [2500, 250, 25][n_samples, n_timestep, n_features]
#y_train.shape -> [2500, 1][n_samples, n_timestep] 

#adds a dimensions since lstm wants 3d arryas
y_train_t = y_train.reshape(y_train.shape[0], y_train.shape[1], 1)
y_valid_t = y_valid.reshape(y_valid.shape[0], y_valid.shape[1], 1) 
  
model = MyModel(units=100,
                x_train = x_train,
                y_train = y_train_t,
                )
  
model.build(input_shape=[(x_train.shape), (y_train_t.shape)])

model.compile(loss="mse", optimizer='adam')
history = model.fit((x_train,y_train_t),
        y_train,
        validation_data=((x_valid, y_valid_t), y_valid),
        epochs=10,
        verbose=1)

Shapes inside call method

inside call method
w_in shape (25, 300)
w_bias shape (300,)
w_rnn shape (300, 300)
y_part shape (2500, 300)
inside call method
w_in shape (25, 300)
w_bias shape (300,)
w_rnn shape (300, 300)
y_part shape (2500, 300)
Epoch 1/10
inside call method
w_in shape (25, 300)
w_bias shape (300,)
w_rnn shape (300, 300)
y_part shape (None, 300)
inside call method
w_in shape (25, 300)
w_bias shape (300,)
w_rnn shape (300, 300)
y_part shape (None, 300)
inside call method
w_in shape (25, 300)
w_bias shape (300,)
w_rnn shape (300, 300)
y_part shape (None, 300)
inside call method
w_in shape (25, 300)
w_bias shape (300,)
w_rnn shape (300, 300)
y_part shape (None, 300)

#and then the InvalidArgumentError
#I noticed sometime is Trying to access element 10 in a list with 10 elements.
#sometimes is Trying to access element 31 in a list with 10 elements.


Get this bounty!!!

#StackBounty: #python #tensorflow #keras Where does keras actually initialize the dataset?

Bounty: 50

I’m trying to understand the implementation of SGD in tensorflow.

In the constructor (__init__ method) of class TensorLikeDataAdapter, self._dataset is initialized by this line

https://github.com/tensorflow/tensorflow/blob/r2.5/tensorflow/python/keras/engine/data_adapter.py#L346

self._dataset = dataset

I tried to print the value out with this line

print('enumerate_epochs self._dataset', list(self._dataset))

and I got

<_OptionsDataset shapes: ((None, 2), (None,)), types: (tf.float32, tf.float32)>

which seems to indicate that the dataset hasn’t yet been actually loaded.

At the very begining of the enumerate_epochs method

https://github.com/tensorflow/tensorflow/blob/r2.5/tensorflow/python/keras/engine/data_adapter.py#L1196

I added this line

def enumerate_epochs(self):
    print('enumerate_epochs self._dataset', list(self._dataset))

and I got 3 (I set epoch=3) of the actual dataset, which means the dataset has been initialized and randomized somewhere before.

I went through the whole data_adapter.py but failed to locate where the dataset is actually initialized.

highlight

I also tried this line

  print('data_handler._dataset', data_handler._dataset)
  for epoch, iterator in data_handler.enumerate_epochs():

and I got

data_handler._dataset <_OptionsDataset shapes: ((None, 2), (None,)), types: (tf.float32, tf.float32)>

However, this line

def _truncate_execution_to_epoch(self):
    print('_truncate_execution_to_epoch self._dataset', list(self._dataset))

gives 3 (epoch=3) of the actual dataset, which means somewhere just in between the dataset is actually initialized though I couldn’t imagine where it could be!

I also tried class DataHandler

print('DataHandler self._dataset', list(self._dataset))
self._configure_dataset_and_inferred_steps(strategy, x, steps_per_epoch,
                                           class_weight, distribute)

and I got this error

AttributeError: 'DataHandler' object has no attribute '_dataset'

Could someone help me to see the light at the end of the tunnel.


Get this bounty!!!

#StackBounty: #python #tensorflow #machine-learning #neural-network #tf.keras TensorFlow/Keras Using specific class recall as metric fo…

Bounty: 100

*Update at bottom

I am trying to use recall on 2 of 3 classes as a metric, so class B and C from classes A,B,C.

(The original nature of this is that my model is highly imbalanced in the classes [~90% is class A], such that when I use accuracy I get results of ~90% for prediciting class A everytime)

model.compile(
              loss='sparse_categorical_crossentropy', #or categorical_crossentropy
              optimizer=opt,
              metrics=[tf.keras.metrics.Recall(class_id=1, name='recall_1'),tf.keras.metrics.Recall(class_id=2, name='recall_2')]
              )

history = model.fit(train_x, train_y, batch_size=BATCH, epochs=EPOCHS, validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint])

This spits out an error:

raise ValueError("Shapes %s and %s are incompatible" % (self, other))

ValueError: Shapes (None, 3) and (None, 1) are incompatible

Model summary is:

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
lstm (LSTM)                  (None, 120, 32)           19328
_________________________________________________________________
dropout (Dropout)            (None, 120, 32)           0
_________________________________________________________________
batch_normalization (BatchNo (None, 120, 32)           128
_________________________________________________________________
lstm_1 (LSTM)                (None, 120, 32)           8320
_________________________________________________________________
dropout_1 (Dropout)          (None, 120, 32)           0
_________________________________________________________________
batch_normalization_1 (Batch (None, 120, 32)           128
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0
_________________________________________________________________
batch_normalization_2 (Batch (None, 32)                128
_________________________________________________________________
dense (Dense)                (None, 32)                1056
_________________________________________________________________
dropout_3 (Dropout)          (None, 32)                0
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 99
=================================================================
Total params: 37,507
Trainable params: 37,315
Non-trainable params: 192

Note that the model works fine without the errors if using:

metrics=['accuracy']

but this and this made me think something has not been implemented along the lines of tf.metrics.SparseCategoricalRecall()

from

tf.metrics.SparseCategoricalAccuracy()


So I diverted to a custom metric which decended into a rabbit hole of other issues as I am highly illeterate when it comes to classes and decorators.

I botched this together from an custom metric example (I have no idea how to use the sample_weight so I commented it out to come back to later):

class RelevantRecall(tf.keras.metrics.Metric):

    def __init__(self, name="Relevant_Recall", **kwargs):
        super(RelevantRecall, self).__init__(name=name, **kwargs)
        self.joined_recall = self.add_weight(name="B/C Recall", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred, axis=1)
        report_dictionary = classification_report(y_true, y_pred, output_dict = True)

        # if sample_weight is not None:
        #     sample_weight = tf.cast(sample_weight, "float32")
        #     values = tf.multiply(values, sample_weight)
        # self.joined_recall.assign_add(tf.reduce_sum(values))

        self.joined_recall.assign_add((float(report_dictionary['1.0']['recall'])+float(report_dictionary['2.0']['recall']))/2)
 
    def result(self):
        return self.joined_recall

    def reset_states(self):
        # The state of the metric will be reset at the start of each epoch.
        self.joined_recall.assign(0.0)


model.compile(
              loss='sparse_categorical_crossentropy', #or categorical_crossentropy
              optimizer=opt,
              metrics=[RelevantRecall()]
              )


history = model.fit(train_x, train_y, batch_size=BATCH, epochs=EPOCHS, validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint])

This aim is to return a metric of [recall(b)+recall(c)/2]. I’d imagine returning both recalls seperately like metrics=[recall(b),recall(c)] would be better but I can’t get the former to work anyway.

I got a tensor bool error: OperatorNotAllowedInGraphError: using a 'tf.Tensor' as a Python 'bool' is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature. which googling led me to add: @tf.function above my custom metric class.

This led to a old vs new class type error:

super(RelevantRecall, self).__init__(name=name, **kwargs)
TypeError: super() argument 1 must be type, not Function

which I didn’t see how I had achieved since the class has an object?

As I said I’m quite new to all aspects of this so any help on how to achieve (and how best to achieve) using a metric of only a selection of prediciton classes would be really appreciated.

OR

if I am going about this entirely wrong let me know/guide me to the correct resource please

Ideally I’d like to go with the former method of using tf.keras.metrics.Recall(class_id=1.... as it seems the neatest way if it worked.

I am able to get the recall for each class when using a similar function in the callbacks part of the model, but this seems more intensive as I have to do a model.predict on val/test data at the end of each epoch.
Also unclear if this even tells the model to focus on improving the selected class (i.e difference in implementing it in metric vs callback)


Callback code:

class MetricsCallback(Callback):
    def __init__(self, test_data, y_true):
        # Should be the label encoding of your classes
        self.y_true = y_true
        self.test_data = test_data

    def on_epoch_end(self, epoch, logs=None):
        # Here we get the probabilities - longer process
        y_pred = self.model.predict(self.test_data)

        # Here we get the actual classes
        y_pred = tf.argmax(y_pred,axis=1)
        report_dictionary = classification_report(self.y_true, y_pred, output_dict = True)
        print ("n")
  
        print (f"Accuracy: {report_dictionary['accuracy']} - Holds: {report_dictionary['0.0']['recall']} - Sells: {report_dictionary['1.0']['recall']} - Buys: {report_dictionary['2.0']['recall']}")
        self._data = (float(report_dictionary['1.0']['recall'])+float(report_dictionary['2.0']['recall']))/2
        return

metrics_callback = MetricsCallback(test_data = validation_x, y_true = validation_y)

history = model.fit(train_x, train_y, batch_size=BATCH, epochs=EPOCHS, validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint, metrics_callback) 

Update 19/07/2021

  • I have resorted to using categorical_crossentropy for loss instead of sparse_categorical_crossentropy.
  • One-hot-encoding my class/target arrays.
  • Using tf recall: [tf.keras.metrics.Recall(class_id=1, name='recall_1')

I am now using the code below.

train_y = tf.one_hot(train_y, 3)
validation_y = tf.one_hot(validation_y, 3)
test_y = tf.one_hot(test_y, 3)

model.compile(
    loss='categorical_crossentropy',
    optimizer=opt,
    metrics=[tf.keras.metrics.Recall(class_id=1, name='No'),tf.keras.metrics.Recall(class_id=2, name='Yes')]
    ) #tf.keras.metrics.Recall(class_id=0, name='Wait')

history = model.fit(train_x, train_y, batch_size=BATCH, epochs=EPOCHS, validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint])

Thanks to Abhishek Prajapat

This achieves the same overall goal and probably has a very small difference/impact on performance due to a small number of mutually exclusive classes,

but in the case of a very large number of mutually exclusive classes I still don’t have an solution to achieving the same goal as above using sparse_categorical_crossentropy


Get this bounty!!!

#StackBounty: #tensorflow #machine-learning #neural-network #lstm #recurrent-neural-network Keras LSTM input ValueError: Shapes are inc…

Bounty: 50

Not sure about why I’m getting an error with my LSTM neural network. It seems to be related with the input shape.

This is my neural network architecture:

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

model = Sequential()

# Recurrent layer
model.add(LSTM(64, return_sequences=False, 
           dropout=0.1, recurrent_dropout=0.1))

# Fully connected layer
model.add(Dense(64, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.5))

# Output layer
model.add(Dense(y_train.nunique(), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

This is how I train it:

history = model.fit(X_train_padded, y_train_padded, 
                    batch_size=2048, epochs=150,
                    validation_data=(X_test_padded, y_test_padded))

This is the shape of my input data:

print(X_train_padded.shape, X_test_padded.shape, y_train_padded.shape, y_test_padded.shape)
(98, 20196, 30) (98, 4935, 30) (98, 20196, 1) (98, 4935, 1)

This is part of my X_train_padded:

X_train_padded
array([[[ 2.60352379e-01, -1.66420518e-01, -3.12893162e-01, ...,
         -1.51210476e-01, -3.56188897e-01, -1.02761131e-01],
        [ 1.26103191e+00, -1.66989382e-01, -3.13025807e-01, ...,
          6.61329839e+00, -3.56188897e-01, -1.02761131e-01],
        [ 1.04418243e+00, -1.66840157e-01, -3.12994596e-01, ...,
         -1.51210476e-01, -3.56188897e-01, -1.02761131e-01],
        ...,
        [ 1.27399408e+00, -1.66998426e-01, -3.13025807e-01, ...,
          6.61329839e+00, -3.56188897e-01, -1.02761131e-01],

This is the error that I’m getting:

Epoch 1/150
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-52-52422b54faa4> in <module>
----> 1 history = model.fit(X_train_padded, y_train_padded, 
      2                     batch_size=2048, epochs=150,
      3                     validation_data=(X_test_padded, y_test_padded))
...
ValueError: Shapes (None, 20196) and (None, 12) are incompatible

As I’m using a LSTM layer, I have a 3D input shape. My output layer has 12 nodes (y_train.nunique()) because I have 12 different classes in my input. Given that I have 12 classes, I’m using softmax as activation function in my output layer and categorical_crossentropy as my loss function.


Get this bounty!!!

#StackBounty: #python #tensorflow #keras #neural-network #recurrent-neural-network Setting the initial state of an RNN represented as a…

Bounty: 50

How do I set the initial state of the recurrent neural network rnn constructed below?

from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.models import Sequential

rnn = Sequential([SimpleRNN(3), Dense(1)])

I’d like to specify the initial state of the first layer before fitting the model with model.fit.


Get this bounty!!!

#StackBounty: #tensorflow #distributed Distributed training with low level Tensorflow API

Bounty: 100

I am working on low level Tensorflow code for my model training. When I say low level it means I’m defining the tf.Session() object of the graph and evaluate graph with in this session.

I would like to distribute the model training using tf.distribute.MirroredStrategy().

I am able to use mirroredstrategy() on tensorflow sequential API’s using the example shared by tensorflow in their document.

But I am facing difficulty in executing tf low level code using mirror strategy.

I tried to use distribute.MirrorStrategy() and below are the results of resource utilization:

[0] GeForce RTX 2080 Ti | 48’C, 40 % | 10771 / 11019 MB |
vipin(10763M) gdm(4M)
[1] GeForce RTX 2080 Ti | 37’C, 0 % |
10376 / 11014 MB | vipin(10327M) gdm(36M) gdm(8M)

Even though model used the memory of both the GPU’s, but still GPU1 utilization is 0.

I am not sure about the issue. Even not sure if tensorflow support this.

Please clear my doubts and if possible share the sample code as well.


Get this bounty!!!

#StackBounty: #neural-networks #tensorflow #keras #wavelet Why does the residual 1×1 conv in wavenet not have an activation?

Bounty: 50

I have been trying to implement a wavenet. From the papers and designs I have looked at on github I have come up with the following…

for i, (last, d) in enumerate(is_last([1, 2, 4, 8, 16, 32, 64, 128, 256] * 4)):
        h = layers.Conv1D(64, 2, dilation_rate = d, padding = 'causal', activation = 'tanh', name = 'h_%d' % i)(r)
        t = layers.Conv1D(64, 2, dilation_rate = d, padding = 'causal', activation = 'sigmoid', name = 't_%d' % i)(r)
        x = h * t
        s = s + layers.Conv1D(256, 1, name = 's_%d' % i)(x)
        if not last:
            r = r + layers.Conv1D(64, 1, name = 'r_%d' % i)(x)

In this code block h and t are the dilated/gated convolutions. The s variable is my skip connection which will eventually have a relu applied to it before the post processing layers. The r variable is my residual connection which is fed into the next layer. What I don’t understand is why the convolution that is added to r does not have an activation function. I know having two linear layers in a row can just be simplified to a single linear layer. Am I missing something here? What is the point of having a linear convolution?


Get this bounty!!!

#StackBounty: #python #tensorflow #keras How to monitor accuracy with CTC loss function and Datasets? (runnable code included)

Bounty: 50

I’ve been trying to speed up training of my CRNN network for optical character recognition, but I can’t get the accuracy metric working when using TFRecords and tf.data.Dataset pipelines. I previously used a Keras Sequence and had it working. Here is a complete runnable toy example showing my problem (tested with Tensorflow 2.4.1):

import random
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.python.keras import Input, Model
from tensorflow.python.keras.layers import Dense, Layer, Bidirectional, GRU, Reshape, Activation
from tensorflow.python.keras.optimizer_v2.adam import Adam

AUTOTUNE = tf.data.experimental.AUTOTUNE
CHAR_VECTOR = "ABC"
IMG_W = 10
IMG_H = 10
N_CHANNELS = 3


class CTCLayer(Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = K.ctc_batch_cost

    def call(self, y_true, y_pred, label_length):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")

        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        # At test time, just return the computed predictions
        return y_pred


def get_model():
    n_classes = len(CHAR_VECTOR) + 1

    input = Input(name='image', shape=(IMG_W, IMG_H, N_CHANNELS), dtype='float32')
    label = Input(name='label', shape=[None], dtype='float32')
    label_length = Input(name='label_length', shape=[None], dtype='int64')

    x = Reshape(target_shape=(IMG_W, np.prod(input.shape[2:])), name='reshape')(input)
    x = Dense(24, activation='relu', name='dense1')(x)
    x = Bidirectional(GRU(24, return_sequences=True, name="GRU"), merge_mode="sum")(x)
    x = Dense(n_classes, name='dense2')(x)
    y_pred = Activation('softmax', name='softmax')(x)

    output = CTCLayer(name="ctc")(label, y_pred, label_length)

    m = Model(inputs=[input, label, label_length], outputs=output)
    return m


def image_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.encode_jpeg(value).numpy()]))


def float_feature_list(value):
    """Returns a list of float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def create_example(image, label, label_length):
    feature = {
        "image": image_feature(image),
        "label": float_feature_list(label),
        "label_length": int64_feature(label_length),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))


def parse_tfrecord_fn(example):
    feature_description = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "label": tf.io.VarLenFeature(tf.float32),
        "label_length": tf.io.FixedLenFeature([1], tf.int64),
    }
    example = tf.io.parse_single_example(example, feature_description)
    example["image"] = tf.image.convert_image_dtype(tf.io.decode_jpeg(example["image"], channels=3), dtype="float32")
    example["label"] = tf.sparse.to_dense(example["label"])

    return example


def generate_tfrecords(n):
    with tf.io.TFRecordWriter(filename) as writer:
        for i in range(n):
            random_img = np.random.random((IMG_W, IMG_H, N_CHANNELS))
            label_length = random.randint(1, max_text_len)
            label = np.random.randint(0, len(CHAR_VECTOR), max_text_len)
            example = create_example(random_img, label, label_length)
            writer.write(example.SerializeToString())


class DataGenerator(tf.keras.utils.Sequence):
    def __len__(self):
        return steps_per_epoch

    def __getitem__(self, index):
        outputs = np.zeros([batch_size])
        dataset = get_dataset()
        inputs = next(iter(dataset.take(1)))
        return inputs, outputs


def get_dataset():
    generate_tfrecords(batch_size * epochs * steps_per_epoch)
    dataset = (
        tf.data.TFRecordDataset(filename, num_parallel_reads=AUTOTUNE)
        .map(parse_tfrecord_fn, num_parallel_calls=AUTOTUNE)
        .batch(batch_size)
        .prefetch(AUTOTUNE)
    )
    return dataset


if __name__ == "__main__":
    batch_size = 9
    epochs = 7
    steps_per_epoch = 8
    max_text_len = 5
    filename = "test.tfrec"
    use_generator = False
    data = DataGenerator() if use_generator else get_dataset()

    model = get_model()
    '''This fails when use_generator == False, removing the 
     metric solves it'''
    model.compile(optimizer=Adam(), metrics=["accuracy"])
    model.fit(data, epochs=epochs, steps_per_epoch=steps_per_epoch)

Set use_generator = False or remove metrics=["accuracy"] and it will run without error.

As you can see the DataGenerator uses the same data from the TFRecords, but it also returns some zeros, and for whatever reason this seems to be the magic sauce:

class DataGenerator(tf.keras.utils.Sequence):
    def __len__(self):
        return steps_per_epoch

    def __getitem__(self, index):
        outputs = np.zeros([batch_size])
        dataset = get_dataset()
        inputs = next(iter(dataset.take(1)))
        return inputs, outputs

I also noticed that this Keras example suffers from the same problem (it crashes if you edit the code to monitor accuracy): https://keras.io/examples/vision/captcha_ocr/

Is there any way to mimic the behaviour of __getitem__ with the Dataset, or some other way of getting the accuracy without using a Sequence?


Get this bounty!!!