RNN use mean square error does not converge
I am learning RNN through https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series =
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list =
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)
tensorflow rnn loss
add a comment |
I am learning RNN through https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series =
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list =
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)
tensorflow rnn loss
add a comment |
I am learning RNN through https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series =
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list =
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)
tensorflow rnn loss
I am learning RNN through https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series =
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list =
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)
tensorflow rnn loss
tensorflow rnn loss
asked Nov 12 at 7:28
John Xu
133
133
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53257563%2frnn-use-mean-square-error-does-not-converge%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.
add a comment |
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.
add a comment |
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.
answered Nov 13 at 20:30
John Xu
133
133
add a comment |
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53257563%2frnn-use-mean-square-error-does-not-converge%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown