*Bounty: 50*

*Bounty: 50*

I’m trying to generate an abstractive text summarization. I’m using word2vec for embedding and bi-lstm with 2 layer in encoder and bi-lstm with 1 layer in decoder, and also I’m using Attention. I trained the model and it always return same vector for all sentences of input. How can I fix this problem?

Training Code

```
latent_dim = 185
embedding_dim=128
encoder_inputs = Input(shape=(int(art_max_length),))
#embedding layer
enc_emb=Embedding(input_vocab_size+1,embedding_dim, weights=[x_emb_matrix_reduce],trainable=False)(encoder_inputs)
#encoder lstm 1
encoder_bi_lstm1 = Bidirectional(LSTM(latent_dim,
return_sequences=True,
return_state=True,
dropout=0.4,
recurrent_dropout=0.4),
merge_mode="concat")
encoder_output1, forward_state_h1, forward_state_c1, backward_state_h1, backward_state_c1 = encoder_bi_lstm1(enc_emb)
encoder_states1 = [forward_state_h1, forward_state_c1, backward_state_h1, backward_state_c1]
#encoder lstm 2
encoder_bi_lstm2 = Bidirectional(LSTM(latent_dim,
return_sequences=True,
return_state=True,
dropout=0.4,
recurrent_dropout=0.4),
merge_mode="concat")
encoder_output2, forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2 = encoder_bi_lstm2(encoder_output1)
encoder_states2 = [forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
#embedding layer
dec_emb_layer = Embedding(output_vocab_size+1, embedding_dim, weights=[y_emb_matrix_reduce], trainable=False)
dec_emb = dec_emb_layer(decoder_inputs)
decoder_bi_lstm = Bidirectional(LSTM(latent_dim,
return_sequences=True,
return_state=True,
dropout=0.4,
recurrent_dropout=0.2),
merge_mode="concat")
decoder_outputs, decoder_fwd_state_h1, decoder_fwd_state_c1, decoder_back_state_h1, decoder_back_state_c1 = decoder_bi_lstm(dec_emb,initial_state=encoder_states2)
decoder_states = [decoder_fwd_state_h1, decoder_fwd_state_c1, decoder_back_state_h1, decoder_back_state_c1]
# Attention layer
attn_layer = AttentionLayer(name='attention_layer')
attn_out, attn_states = attn_layer([encoder_output2, decoder_outputs])
# Concat attention input and decoder LSTM output
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
epochs = 75
batch_size = 3
learning_rate = 0.001
initial_accumulator_value = 0.1
name = 'Adagrad'
clipnorm = 1.0
opt = Adagrad(learning_rate=learning_rate, initial_accumulator_value=initial_accumulator_value, name=name, clipnorm=clipnorm)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy')
es = EarlyStopping(monitor='val_loss', mode='auto', verbose=1,patience=10)
history=model.fit(x_tr, y_tr, epochs=epochs, callbacks=[es], steps_per_epoch=250, validation_steps=10, batch_size=batch_size, validation_data=(x_val,y_val))
```

Inference Code

```
reverse_target_word_index = y_tokenizer.index_word
reverse_source_word_index = x_tokenizer.index_word
target_word_index = y_tokenizer.word_index
# Encode the input sequence to get the feature vector
encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_output2, forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2])
# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h_fwd = Input(shape=(latent_dim,))
decoder_state_input_h_bwd = Input(shape=(latent_dim,))
decoder_state_input_c_fwd = Input(shape=(latent_dim,))
decoder_state_input_c_bwd = Input(shape=(latent_dim,))
decoder_hidden_state_input = Input(shape=(art_max_length,latent_dim*2))
# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs)
# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, decoder_fwd_state_h2, decoder_fwd_state_c2, decoder_back_state_h2, decoder_back_state_c2 = decoder_bi_lstm(dec_emb2, initial_state=[decoder_state_input_h_fwd, decoder_state_input_h_bwd, decoder_state_input_c_fwd, decoder_state_input_c_bwd])
decoder_states2 = [decoder_fwd_state_h2, decoder_fwd_state_c2, decoder_back_state_h2, decoder_back_state_c2]
#attention inference
attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])
decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf])
# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_inf_concat)
# Final decoder model
decoder_model = Model(
[decoder_inputs] + [decoder_hidden_state_input, decoder_state_input_h_fwd, decoder_state_input_h_bwd, decoder_state_input_c_fwd, decoder_state_input_c_bwd],
[decoder_outputs2] + decoder_states2)
```

Code to generate summary

```
def seq2summary(input_seq):
newString=''
for i in input_seq:
if((i[0]!=0) and (i[0]!=target_word_index['sostok']) and (i[0]!=target_word_index['eostok'])):
newString=newString+reverse_target_word_index[i[0]]+' '
return newString
def seq2text(input_seq):
newString=''
for i in input_seq:
if(i!=0):
newString=newString+reverse_source_word_index[i]+' '
return newString
def decode_sequence(input_seq):
e_out, e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd = encoder_model.predict(input_seq)
# Generate empty target sequence of length 1.
target_seq = np.zeros((1,1))
# Populate the first word of target sequence with the start word.
target_seq[0, 0] = target_word_index['sostok']
stop_condition = False
decoded_sentence = ''
while not stop_condition:
output_tokens, h_fwd, c_fwd, h_bwd, c_bwd = decoder_model.predict([target_seq] + [e_out, e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd])
return output_tokens[0, -1, :]
# Sample a token
sampled_token_index = np.argmax(output_tokens[0, -1, :])
sampled_token = reverse_target_word_index[sampled_token_index]
if(sampled_token!='eostok'):
decoded_sentence += ' '+sampled_token
# Exit condition: either hit max length or find stop word.
if (sampled_token == 'eostok' or len(decoded_sentence.split()) >= (high_max_length-1)):
stop_condition = True
# Update the target sequence (of length 1).
target_seq = np.zeros((1,1))
target_seq[0, 0] = sampled_token_index
# Update internal states
e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd = h_fwd, c_fwd, h_bwd, c_bwd
return decoded_sentence
```