#StackBounty: #tensorflow #keras #deep-learning #nlp #lstm Seq2Seq model return same vector for all sentences

Bounty: 50

I’m trying to generate an abstractive text summarization. I’m using word2vec for embedding and bi-lstm with 2 layer in encoder and bi-lstm with 1 layer in decoder, and also I’m using Attention. I trained the model and it always return same vector for all sentences of input. How can I fix this problem?

Training Code

latent_dim = 185

encoder_inputs = Input(shape=(int(art_max_length),))

#embedding layer
enc_emb=Embedding(input_vocab_size+1,embedding_dim, weights=[x_emb_matrix_reduce],trainable=False)(encoder_inputs)

#encoder lstm 1
encoder_bi_lstm1 = Bidirectional(LSTM(latent_dim,
encoder_output1, forward_state_h1, forward_state_c1, backward_state_h1, backward_state_c1 = encoder_bi_lstm1(enc_emb)
encoder_states1 = [forward_state_h1, forward_state_c1, backward_state_h1, backward_state_c1]

#encoder lstm 2
encoder_bi_lstm2 = Bidirectional(LSTM(latent_dim,
encoder_output2, forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2 = encoder_bi_lstm2(encoder_output1)
encoder_states2 = [forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))

#embedding layer
dec_emb_layer = Embedding(output_vocab_size+1, embedding_dim, weights=[y_emb_matrix_reduce], trainable=False)
dec_emb = dec_emb_layer(decoder_inputs)

decoder_bi_lstm = Bidirectional(LSTM(latent_dim, 
decoder_outputs, decoder_fwd_state_h1, decoder_fwd_state_c1, decoder_back_state_h1, decoder_back_state_c1 = decoder_bi_lstm(dec_emb,initial_state=encoder_states2)
decoder_states = [decoder_fwd_state_h1, decoder_fwd_state_c1, decoder_back_state_h1, decoder_back_state_c1]

# Attention layer
attn_layer = AttentionLayer(name='attention_layer')
attn_out, attn_states = attn_layer([encoder_output2, decoder_outputs])

# Concat attention input and decoder LSTM output
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

epochs = 75
batch_size = 3
learning_rate = 0.001
initial_accumulator_value = 0.1
name = 'Adagrad'
clipnorm = 1.0

opt = Adagrad(learning_rate=learning_rate, initial_accumulator_value=initial_accumulator_value, name=name, clipnorm=clipnorm)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy')
es = EarlyStopping(monitor='val_loss', mode='auto', verbose=1,patience=10)
history=model.fit(x_tr, y_tr, epochs=epochs, callbacks=[es], steps_per_epoch=250, validation_steps=10, batch_size=batch_size, validation_data=(x_val,y_val))

Inference Code

reverse_target_word_index = y_tokenizer.index_word
reverse_source_word_index = x_tokenizer.index_word
target_word_index = y_tokenizer.word_index

# Encode the input sequence to get the feature vector
encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_output2, forward_state_h2, forward_state_c2, backward_state_h2, backward_state_c2])

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h_fwd = Input(shape=(latent_dim,))
decoder_state_input_h_bwd = Input(shape=(latent_dim,))

decoder_state_input_c_fwd = Input(shape=(latent_dim,))
decoder_state_input_c_bwd = Input(shape=(latent_dim,))

decoder_hidden_state_input = Input(shape=(art_max_length,latent_dim*2))

# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs)

# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, decoder_fwd_state_h2, decoder_fwd_state_c2, decoder_back_state_h2, decoder_back_state_c2 = decoder_bi_lstm(dec_emb2, initial_state=[decoder_state_input_h_fwd, decoder_state_input_h_bwd, decoder_state_input_c_fwd, decoder_state_input_c_bwd])
decoder_states2 = [decoder_fwd_state_h2, decoder_fwd_state_c2, decoder_back_state_h2, decoder_back_state_c2]

#attention inference
attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])
decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_inf_concat)

# Final decoder model
decoder_model = Model(
    [decoder_inputs] + [decoder_hidden_state_input, decoder_state_input_h_fwd, decoder_state_input_h_bwd, decoder_state_input_c_fwd, decoder_state_input_c_bwd],
    [decoder_outputs2] + decoder_states2)

Code to generate summary

def seq2summary(input_seq):
    for i in input_seq:
            if((i[0]!=0) and (i[0]!=target_word_index['sostok']) and (i[0]!=target_word_index['eostok'])):
                newString=newString+reverse_target_word_index[i[0]]+' '
    return newString

def seq2text(input_seq):
    for i in input_seq:
            newString=newString+reverse_source_word_index[i]+' '
    return newString

def decode_sequence(input_seq):
    e_out, e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd = encoder_model.predict(input_seq)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))

    # Populate the first word of target sequence with the start word.
    target_seq[0, 0] = target_word_index['sostok']

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h_fwd, c_fwd, h_bwd, c_bwd = decoder_model.predict([target_seq] + [e_out, e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd])

        return output_tokens[0, -1, :]
        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token = reverse_target_word_index[sampled_token_index]

            decoded_sentence += ' '+sampled_token

        # Exit condition: either hit max length or find stop word.
        if (sampled_token == 'eostok'  or len(decoded_sentence.split()) >= (high_max_length-1)):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update internal states 
        e_h_fwd, e_c_fwd, e_h_bwd, e_c_bwd = h_fwd, c_fwd, h_bwd, c_bwd
    return decoded_sentence

Get this bounty!!!

#StackBounty: #deep-learning #nlp #svm #text-classification #language-model Best approach for text classification of phrases with littl…

Bounty: 50

So I have the task of classifying sentences based on their level of ‘change talk’ shown. Change talk is a psychology term used in counseling sessions to express how much the client wants to change their behavior.

So let’s say there are two classes: change talk; and non-change talk.

An example of change talk is: "I have to do this." or "I can achieve this."

An example of non-change talk is "I can’t do this." or "I have no motivation."

My issue is, if I want to take a machine learning approach in classifying these sentences, which is the best approach? SVM’s? I do not have a lot of training data. Also – all the tutorials I look at use sentences with obvious words that can easily be classified (e.g. "The baseball game is on tomorrow." -> SPORT, or "Donald Trump will make a TV announcement tomorrow." -> POLITICS).

I feel my data is harder to classify as it typically does not have keywords relating to each class.

Some guidance on how people would approach this task would be great.

Get this bounty!!!

#StackBounty: #deep-learning #classification #keras #convolutional-neural-network #ai Convolutional Neural Network for Signal Modulatio…

Bounty: 100

I recently posted another question and this question is the evolution of that one.

By the way I will resume all the problem below, like if the previous question didn’t ever exist.

Problem description

I’m doing Signal Modulation Classification using a Convolutional Neural Network and I want to improve performance.


Dataset is composed by 220.000 rows like these. Data is perfectly balanced: I have 20.000 datapoints for each label.

Dataset column Type Range Form Notes
Signal i=real, q=real [i_0, i_1, …, i_n], [q_0, q_1, …, q_n] n=127
SNR s=integer [-18, 20] s
Label l=string l They are 11 labels

Lower is the SNR value, and noisier is the signal: classify low SNR signals is not that easy.

Neural Network

Neural Network is a Convolutional Neural Network coded as below:


iq_in = keras.Input(shape=in_shp, name="IQ")
reshape = Reshape(in_shp + [1])(iq_in)
batch_normalization = BatchNormalization()(reshape)

conv_1 = Convolution2D(16, 4, padding="same", activation="relu")(batch_normalization)
max_pool = MaxPooling2D(padding='same')(conv_1)
batch_normalization_2 = BatchNormalization()(max_pool)
fc1 = Dense(256, activation="relu")(batch_normalization_2)
conv_2 = Convolution2D(32, 2, padding="same", activation="relu")(fc1)
batch_normalization_3 = BatchNormalization()(conv_2)
max_pool_2 = MaxPooling2D(padding='same')(batch_normalization_3)

out_flatten = Flatten()(max_pool_2)
dr = Dropout(DROPOUT_RATE)(out_flatten)
fc2 = Dense(256, activation="relu")(dr)
batch_normalization_4 = BatchNormalization()(fc2)
fc3 = Dense(128, activation="relu")(batch_normalization_4)
output = Dense(11, name="output", activation="softmax")(fc3)

model = keras.Model(inputs=[iq_in], outputs=[output])
model.compile(loss='categorical_crossentropy', optimizer='adam')




Training is being done splitting the data in 75% as Training set, 25% as Test set.

NB_EPOCH = 100     # number of epochs to train on
BATCH_SIZE = 1024  # training batch size


history = model.fit(
    validation_data=(X_test, Y_test),
    callbacks = [
        keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='auto'),
        keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='auto')

# we re-load the best weights once training is finished



My evaluation system evaluate how accurate is my Neural Network for classifying signals with different SNR.


What did I try?

Thisis a list of things that I tried and I’m sure that are modifying performances in worse:

  • Reducing batch size (only increases training time without improving test accuracy)
  • Training without too noisy signals (lowers accuracy)
  • Moving the Dropout layer before the Flatten layer


Any suggestion to get better performances?

Thanks in advance!

Get this bounty!!!

#StackBounty: #python #machine-learning #deep-learning Remove white borders from segmented images

Bounty: 50

I’m new to image processing and am trying to segment lung CT images using Kmeans by using code below:

def process_mask(mask):
    convex_mask = np.copy(mask)
    for i_layer in range(convex_mask.shape[0]):
        mask1  = np.ascontiguousarray(mask[i_layer])
        if np.sum(mask1)>0:
            mask2 = convex_hull_image(mask1)
            if np.sum(mask2)>2*np.sum(mask1):
                mask2 = mask1
            mask2 = mask1
        convex_mask[i_layer] = mask2
    struct = generate_binary_structure(3,1)
    dilatedMask = binary_dilation(convex_mask,structure=struct,iterations=10)

    return dilatedMask

def lumTrans(img):
    lungwin = np.array([-1200.,600.])
    newimg = (img-lungwin[0])/(lungwin[1]-lungwin[0])
    newimg = (newimg*255).astype('uint8')
    return newimg

def lungSeg(imgs_to_process,output,name):

    if os.path.exists(output+'/'+name+'_clean.npy') : return
    imgs_to_process = Image.open(imgs_to_process)
    img_to_save = imgs_to_process.copy()
    img_to_save = np.asarray(img_to_save).astype('uint8')

    imgs_to_process = lumTrans(imgs_to_process)    
    imgs_to_process = np.expand_dims(imgs_to_process, axis=0)
    x,y,z = imgs_to_process.shape 
    img_array = imgs_to_process.copy()  
    A1 = int(y/(512./100))
    A2 = int(y/(512./400))

    A3 = int(y/(512./475))
    A4 = int(y/(512./40))
    A5 = int(y/(512./470))
    for i in range(len(imgs_to_process)):
        img = imgs_to_process[i]
        x,y = img.shape
        #Standardize the pixel values
        allmean = np.mean(img)
        allstd = np.std(img)
        img = img-allmean
        img = img/allstd
        # Find the average pixel value near the lungs
        # to renormalize washed out images
        middle = img[A1:A2,A1:A2] 
        mean = np.mean(middle)  
        max = np.max(img)
        min = np.min(img)
        kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))
        centers = sorted(kmeans.cluster_centers_.flatten())
        threshold = np.mean(centers)
        thresh_img = np.where(img<threshold,1.0,0.0)  # threshold the image
        eroded = morphology.erosion(thresh_img,np.ones([4,4]))
        dilation = morphology.dilation(eroded,np.ones([10,10]))
        labels = measure.label(dilation)
        label_vals = np.unique(labels)
        regions = measure.regionprops(labels)
        good_labels = []
        for prop in regions:
            B = prop.bbox
            if B[2]-B[0]<A3 and B[3]-B[1]<A3 and B[0]>A4 and B[2]<A5:
        mask = np.ndarray([x,y],dtype=np.int8)
        mask[:] = 0
        for N in good_labels:
            mask = mask + np.where(labels==N,1,0)
        mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation
        imgs_to_process[i] = mask

    m1 = imgs_to_process
    convex_mask = m1
    dm1 = process_mask(m1)
    dilatedMask = dm1
    Mask = m1
    extramask = dilatedMask ^ Mask
    bone_thresh = 180
    pad_value = 0

    sliceim = img_array
    sliceim = sliceim*dilatedMask+pad_value*(1-dilatedMask).astype('uint8')
    bones = sliceim*extramask>bone_thresh
    sliceim[bones] = pad_value

    x,y,z = sliceim.shape
    if not os.path.exists(output): 
    img_to_save[sliceim.squeeze()==0] = 0
    im = Image.fromarray(img_to_save)

    im.save(output + name + '.png', 'PNG')

The problem is the segmented lung still contains white borderers like this:

segmented lung (output):

segmented lung

unsegmented lung (input):

unsegmented lung

The full code can be found in Google Colab Notebook. code.

And sample of the dataset is here.

Get this bounty!!!

#StackBounty: #python #image #opencv #deep-learning #computer-vision How to improve the binarization of Text document ( Fill missing pi…

Bounty: 100

What (and how) can be done (like where exactly to plcae Erosion, Dialiation, Opening, Closing etc) so that the words are not cut / invisible in between (maybe some other binarization technique which works on most of the cases or some parameter tuning)

I have this code to binarize / Threshold the image. It works fine on a wide range of images except a couple of things and the major problem being that it leads to loss of info due to some brightness and other factors. Some of the words are not readable or gets broken. Below is the code to threshold / Binarize along with some of the images. Also, here is the link to 200 sample and resulting images

import cv2
import skimage.filters as filters

def convert(path):
    img = cv2.imread(path)
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

    smooth = cv2.GaussianBlur(gray, (103,103), 0)

    division = cv2.divide(gray, smooth, scale=255)

    sharp = filters.unsharp_mask(division, radius=1.5, amount=5, multichannel=False, preserve_range=False) # High Radius increase the density or surrounding pixels making it dense
    sharp = (255*sharp).clip(0,255).astype(np.uint8)

    thresh = cv2.threshold(sharp, 0, 255, cv2.THRESH_OTSU )[1] 
    return thresh

enter image description here
enter image description here

Get this bounty!!!

#StackBounty: #machine-learning #python #deep-learning #clustering #data-mining How to cluster skills in job domain?

Bounty: 100

I have a problem related to clustering, where i need to cluster skill set from job domain.

Let’s say, in a resume a candidate can mention they familiarity with amazon s3 bucket. But each people can mention it in any way. For example,

  1. amazon s3
  2. s3
  3. aws s3

For a human, we can easily understand these three are exactly equavalent. I can’t use kmeans type of clustering because it can fail in a lot of cases.

For example,

  1. spring
  2. spring framework
  3. Spring MVC
  4. Spring Boot

These may fall in same cluster which is wrong. A candidate who knows spring framework might not know sprint boot etc.,

Similarity of word based on embeddings/bow model fail here.

What are the options I have? Currently I manually collected a lot of word variations in a dict format, key is root word value is array of variations of that root word.

Any help is really appreciated?

Get this bounty!!!

#StackBounty: #machine-learning #deep-learning #keras #r #convolutional-neural-network Calculate importance of input data bands for CNN…

Bounty: 50

I constructed and trained a convolutional neural network using Keras in R with the TensorFlow backend. I feed the network with multispectral images for a simple image classification.

Is there some way to calculate which of the input bands were most important for the classification task? Ideally, I would like to have a plot with some measure of importance, grouped by bands and image classes.

How can I obtain this information? Would it be necessary / possible to calculate saliency maps for every band and picture, and take the mean or sum of these images per class and band?

Are there also other ways to get the information, which band was most important for the classification of an image?

Edit: With saliency maps I mean these or these visualizations. They provide information on which part of the image led the CNN to the conclusion to which class it identifies. However, I always see only one saliency map for the whole image. Is it possible to make one for each input band of an image? For example if I input RGB data, one for each color channel?

(This is inspired by a visualization of this paper. I saw it but I don’t know if it’s valid to do and if yes, how to do.)

Get this bounty!!!

#StackBounty: #deep-learning #neural-network #convolutional-neural-network #autoencoder Autoencoder not learning walk forward image tra…

Bounty: 50

I have a series of 15 frames with (60 rows x 50 columns). Over the course of those 15 frames, the moon moves from the top left to the bottom right.

Data = https://github.com/aiqc/AIQC/tree/main/remote_datum/image/liberty_moon

enter image description here

enter image description here

enter image description here

As my input data I have a 60×50 image. As my evaluation label I have a 60×50 image from 2 frames later. All are divided by 255.

I am attempting an autoencoder.

    model = keras.models.Sequential()
    model.add(layers.Conv1D(64*hp['multiplier'], 3, activation='relu', padding='same'))
    model.add(layers.MaxPool1D( 2, padding='same'))
    model.add(layers.Conv1D(32*hp['multiplier'], 3, activation='relu', padding='same'))
    model.add(layers.MaxPool1D( 2, padding='same'))
    model.add(layers.Conv1D(16*hp['multiplier'], 3, activation='relu', padding='same'))
    model.add(layers.MaxPool1D( 2, padding='same'))

    model.add(layers.Conv1D(16*hp['multiplier'], 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(32*hp['multiplier'], 3, activation='relu', padding='same'))
    model.add(layers.Conv1D(64*hp['multiplier'], 3, activation='relu'))

    model.add(layers.Conv1D(50, 3, activation='sigmoid', padding='same'))
    # last layer tried sigmoid with BCE loss.
    # last layer tried relu with MAE.

Tutorials say to use a final layer of sigmoid and BCE loss, but the values I’m producing must not be between 0-1 because the loss goes way negative.

enter image description here

If I use a final layer of relu with MAE loss it claims to learn something.

enter image description here

But the predicted image is notttt great:

enter image description here

Get this bounty!!!

#StackBounty: #deep-learning #image-classification #convolutional-neural-network #distributed #inference Distributed inference for imag…

Bounty: 50

I would like to take the output of an intermediate layer of a CNN (layer G) and feed it to an intermediate layer of a wider CNN (layer H) to complete the inference.

Challenge: The two layers G, H have different dimensions and thus it can’t be done directly.
Solution: Use a third CNN (call it r) which will take as input the output of layer G and output a valid input for layer H.
Then both the weights of layer G and r will be tuned using the loss function:

$$L(W_G, W_r) = MSE(text{output of layer H}, text{output of r})$$

My question: Will this method only change the layer G’s weights along with r’s weights? Does the whole system require finetuning afterwards to update the weights of the other layers?

Get this bounty!!!

#StackBounty: #python #deep-learning #pytorch #torch #knowledge-graph Joint training of two embedding models (KGE + GloVe)

Bounty: 500

How do I create a joint model that shares the parameters of a Knowledge Graph Embedding (KGE) model, TuckER (given below), and GloVe (assume a co-occurrence matrix along with the dimensions is already available) in ?

In other words, the joint model must obey the criterion of the CMTF (Coupled Matrix and Tensor Factorizations) Framework and the weights from the two embeddings must be tied during training. The problem here is that the KGE expects a triple (subject, relation, object) whereas the GloVe expects a co-occurrence matrix. Additionally, their loss functions are also computed differently.

class TuckER(torch.nn.Module):
    def __init__(self, d, d1, d2, **kwargs):
        super(TuckER, self).__init__()

        self.E = torch.nn.Embedding(len(d.entities), d1)
        self.R = torch.nn.Embedding(len(d.relations), d2)
        self.W = torch.nn.Parameter(torch.tensor(np.random.uniform(-1, 1, (d2, d1, d1)), 
                                    dtype=torch.float, device="cuda", requires_grad=True))

        self.input_dropout = torch.nn.Dropout(kwargs["input_dropout"])
        self.hidden_dropout1 = torch.nn.Dropout(kwargs["hidden_dropout1"])
        self.hidden_dropout2 = torch.nn.Dropout(kwargs["hidden_dropout2"])
        self.loss = torch.nn.BCELoss()

        self.bn0 = torch.nn.BatchNorm1d(d1)
        self.bn1 = torch.nn.BatchNorm1d(d1)
    def init(self):

    def forward(self, e1_idx, r_idx):
        e1 = self.E(e1_idx)
        x = self.bn0(e1)
        x = self.input_dropout(x)
        x = x.view(-1, 1, e1.size(1))

        r = self.R(r_idx)
        W_mat = torch.mm(r, self.W.view(r.size(1), -1))
        W_mat = W_mat.view(-1, e1.size(1), e1.size(1))
        W_mat = self.hidden_dropout1(W_mat)

        x = torch.bmm(x, W_mat) 
        x = x.view(-1, e1.size(1))      
        x = self.bn1(x)
        x = self.hidden_dropout2(x)
        x = torch.mm(x, self.E.weight.transpose(1,0))
        pred = torch.sigmoid(x)
        return pred

I know how to jointly train two pre-trained models by loading the state dicts, taking an instance, running them on the two models, and then applying a feedforward layer on top. But I seem to be not able to figure this scenario out. Can you please suggest how I can achieve this?

Important Resources:

  1. Code for TuckER – https://github.com/ibalazevic/TuckER

Get this bounty!!!