RNN use mean square error does not converge

I am learning RNN through https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:

from __future__ import print_function, division

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt



num_epochs = 100

total_series_length = 50000

truncated_backprop_length = 15

state_size = 4

num_classes = 1

echo_step = 3

batch_size = 5

num_batches = total_series_length//batch_size//truncated_backprop_length



def generateData():

    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))

    y = np.roll(x, echo_step)

    y[0:echo_step] = 0



    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows

    y = y.reshape((batch_size, -1))



    return (x, y)



tf.reset_default_graph()

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])

batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])



init_state = tf.placeholder(tf.float32, [batch_size, state_size])



W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)

b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)



W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)

b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)



# Unpack columns

inputs_series = tf.unstack(batchX_placeholder, axis=1)

labels_series = tf.unstack(batchY_placeholder, axis=1)



# Forward pass

current_state = init_state

states_series = 

for current_input in inputs_series:

    current_input = tf.reshape(current_input, [batch_size, 1])

    input_and_state_concatenated = tf.concat([current_input, current_state],axis=1)  # Increasing number of columns



    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition

    states_series.append(next_state)

    current_state = next_state



logits_series = [tf.matmul(state, W2) + b2 for state in states_series] 

#Loss function HERE

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

losses = tf.square(midlosses)

total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

with tf.Session() as sess:

    sess.run(tf.initialize_all_variables())

    loss_list = 



    for epoch_idx in range(num_epochs):

        x,y = generateData()

        _current_state = np.zeros((batch_size, state_size))



        print("New data, epoch", epoch_idx)



        for batch_idx in range(num_batches):

            start_idx = batch_idx * truncated_backprop_length

            end_idx = start_idx + truncated_backprop_length



            batchX = x[:,start_idx:end_idx]

            batchY = y[:,start_idx:end_idx]



            _total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(

                [total_loss, train_step, current_state,logits_series,midlosses],

                feed_dict={

                    batchX_placeholder:batchX,

                    batchY_placeholder:batchY,

                    init_state:_current_state

                }) 

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:

                print("Step",batch_idx, "Loss", _total_loss)

asked Nov 12 at 7:28

John Xu

133

add a comment |

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:

from __future__ import print_function, division

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt



num_epochs = 100

total_series_length = 50000

truncated_backprop_length = 15

state_size = 4

num_classes = 1

echo_step = 3

batch_size = 5

num_batches = total_series_length//batch_size//truncated_backprop_length



def generateData():

    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))

    y = np.roll(x, echo_step)

    y[0:echo_step] = 0



    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows

    y = y.reshape((batch_size, -1))



    return (x, y)



tf.reset_default_graph()

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])

batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])



init_state = tf.placeholder(tf.float32, [batch_size, state_size])



W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)

b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)



W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)

b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)



# Unpack columns

inputs_series = tf.unstack(batchX_placeholder, axis=1)

labels_series = tf.unstack(batchY_placeholder, axis=1)



# Forward pass

current_state = init_state

states_series = 

for current_input in inputs_series:

    current_input = tf.reshape(current_input, [batch_size, 1])

    input_and_state_concatenated = tf.concat([current_input, current_state],axis=1)  # Increasing number of columns



    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition

    states_series.append(next_state)

    current_state = next_state



logits_series = [tf.matmul(state, W2) + b2 for state in states_series] 

#Loss function HERE

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

losses = tf.square(midlosses)

total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

with tf.Session() as sess:

    sess.run(tf.initialize_all_variables())

    loss_list = 



    for epoch_idx in range(num_epochs):

        x,y = generateData()

        _current_state = np.zeros((batch_size, state_size))



        print("New data, epoch", epoch_idx)



        for batch_idx in range(num_batches):

            start_idx = batch_idx * truncated_backprop_length

            end_idx = start_idx + truncated_backprop_length



            batchX = x[:,start_idx:end_idx]

            batchY = y[:,start_idx:end_idx]



            _total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(

                [total_loss, train_step, current_state,logits_series,midlosses],

                feed_dict={

                    batchX_placeholder:batchX,

                    batchY_placeholder:batchY,

                    init_state:_current_state

                }) 

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:

                print("Step",batch_idx, "Loss", _total_loss)

asked Nov 12 at 7:28

John Xu

133

add a comment |

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:

from __future__ import print_function, division

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt



num_epochs = 100

total_series_length = 50000

truncated_backprop_length = 15

state_size = 4

num_classes = 1

echo_step = 3

batch_size = 5

num_batches = total_series_length//batch_size//truncated_backprop_length



def generateData():

    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))

    y = np.roll(x, echo_step)

    y[0:echo_step] = 0



    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows

    y = y.reshape((batch_size, -1))



    return (x, y)



tf.reset_default_graph()

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])

batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])



init_state = tf.placeholder(tf.float32, [batch_size, state_size])



W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)

b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)



W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)

b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)



# Unpack columns

inputs_series = tf.unstack(batchX_placeholder, axis=1)

labels_series = tf.unstack(batchY_placeholder, axis=1)



# Forward pass

current_state = init_state

states_series = 

for current_input in inputs_series:

    current_input = tf.reshape(current_input, [batch_size, 1])

    input_and_state_concatenated = tf.concat([current_input, current_state],axis=1)  # Increasing number of columns



    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition

    states_series.append(next_state)

    current_state = next_state



logits_series = [tf.matmul(state, W2) + b2 for state in states_series] 

#Loss function HERE

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

losses = tf.square(midlosses)

total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

with tf.Session() as sess:

    sess.run(tf.initialize_all_variables())

    loss_list = 



    for epoch_idx in range(num_epochs):

        x,y = generateData()

        _current_state = np.zeros((batch_size, state_size))



        print("New data, epoch", epoch_idx)



        for batch_idx in range(num_batches):

            start_idx = batch_idx * truncated_backprop_length

            end_idx = start_idx + truncated_backprop_length



            batchX = x[:,start_idx:end_idx]

            batchY = y[:,start_idx:end_idx]



            _total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(

                [total_loss, train_step, current_state,logits_series,midlosses],

                feed_dict={

                    batchX_placeholder:batchX,

                    batchY_placeholder:batchY,

                    init_state:_current_state

                }) 

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:

                print("Step",batch_idx, "Loss", _total_loss)

asked Nov 12 at 7:28

John Xu

133

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:

from __future__ import print_function, division

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt



num_epochs = 100

total_series_length = 50000

truncated_backprop_length = 15

state_size = 4

num_classes = 1

echo_step = 3

batch_size = 5

num_batches = total_series_length//batch_size//truncated_backprop_length



def generateData():

    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))

    y = np.roll(x, echo_step)

    y[0:echo_step] = 0



    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows

    y = y.reshape((batch_size, -1))



    return (x, y)



tf.reset_default_graph()

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])

batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])



init_state = tf.placeholder(tf.float32, [batch_size, state_size])



W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)

b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)



W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)

b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)



# Unpack columns

inputs_series = tf.unstack(batchX_placeholder, axis=1)

labels_series = tf.unstack(batchY_placeholder, axis=1)



# Forward pass

current_state = init_state

states_series = 

for current_input in inputs_series:

    current_input = tf.reshape(current_input, [batch_size, 1])

    input_and_state_concatenated = tf.concat([current_input, current_state],axis=1)  # Increasing number of columns



    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition

    states_series.append(next_state)

    current_state = next_state



logits_series = [tf.matmul(state, W2) + b2 for state in states_series] 

#Loss function HERE

midlosses = [tf.squeeze(logits)-tf.squeeze(labels)  for logits, labels in zip(logits_series,labels_series)]

losses = tf.square(midlosses)

total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

with tf.Session() as sess:

    sess.run(tf.initialize_all_variables())

    loss_list = 



    for epoch_idx in range(num_epochs):

        x,y = generateData()

        _current_state = np.zeros((batch_size, state_size))



        print("New data, epoch", epoch_idx)



        for batch_idx in range(num_batches):

            start_idx = batch_idx * truncated_backprop_length

            end_idx = start_idx + truncated_backprop_length



            batchX = x[:,start_idx:end_idx]

            batchY = y[:,start_idx:end_idx]



            _total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(

                [total_loss, train_step, current_state,logits_series,midlosses],

                feed_dict={

                    batchX_placeholder:batchX,

                    batchY_placeholder:batchY,

                    init_state:_current_state

                }) 

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:

                print("Step",batch_idx, "Loss", _total_loss)

tensorflow rnn loss

asked Nov 12 at 7:28

John Xu

133

asked Nov 12 at 7:28

John Xu

133

asked Nov 12 at 7:28

John Xu

133

asked Nov 12 at 7:28

John Xu

133

asked Nov 12 at 7:28

John Xu

133

add a comment |

1 Answer
1

active

oldest

votes

Just need to replace

logits_series = [tf.matmul(state, W2) + b2 for state in states_series]

logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition

Problem can solved.

answered Nov 13 at 20:30

John Xu

133

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53257563%2frnn-use-mean-square-error-does-not-converge%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

Just need to replace

logits_series = [tf.matmul(state, W2) + b2 for state in states_series]

logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition

Problem can solved.

answered Nov 13 at 20:30

John Xu

133

add a comment |

Just need to replace

logits_series = [tf.matmul(state, W2) + b2 for state in states_series]

logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition

Problem can solved.

answered Nov 13 at 20:30

John Xu

133

add a comment |

Just need to replace

logits_series = [tf.matmul(state, W2) + b2 for state in states_series]

logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition

Problem can solved.

answered Nov 13 at 20:30

John Xu

133

Just need to replace

logits_series = [tf.matmul(state, W2) + b2 for state in states_series]

logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition

Problem can solved.

answered Nov 13 at 20:30

John Xu

133

answered Nov 13 at 20:30

John Xu

133

answered Nov 13 at 20:30

John Xu

133

answered Nov 13 at 20:30

John Xu

133

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

Some of your past answers have not been well-received, and you're in danger of being blocked from answering.

Please pay close attention to the following guidance:

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

Yt9,G7JHz0J0OcFwCSY0qcLGm O 3X6UuuO01hEK,5SZW0rjZBv34vIOisuYlrw 16,axtO,ScQKMX9fWreHKNK77kL5sYsL,P83

搜尋此網誌

Ndtyjky