Why is my PPO algorithm not learning a simple environment?

Why is my PPO algorithm not learning a simple environment? - machine-learning

I have coded a PPO algorithm from scratch and tried testing it on Pendulum-v2. For some reason, the algorithm doesn't learn anything. I wrote this code to understand the mechanics of how PPO works and therefore have kept everything simple. I also referred popular code bases to ensure that I don't deviate too much from the norm.
Here are my actor and critic networks -
class Actor(nn.Module):
def __init__(self, state_size, action_size):
super(Actor, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_relu_stack = nn.Sequential(
nn.Linear(state_size, 300),
nn.ReLU(),
nn.Linear(300, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, action_size),
nn.Softmax()
)
def forward(self,x):
x = self.linear_relu_stack(x)
return x
class Critic(nn.Module):
def __init__(self, state_size, action_size):
super(Critic, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_stack = nn.Sequential(
nn.Linear(state_size, 300),
nn.ReLU(),
nn.Linear(300, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, 1)
)
def forward(self, x):
x = self.linear_stack(x)
return x
Here is the rollout phase where we collect data -
def rollout():
for i in range(ppo_batch): # 100 episodes should be good?
print("Rollout process, i = ", i)
obs = torch.tensor(env.reset(), dtype=torch.float32).unsqueeze(0)
tot_rewards = 0
transitions = []
iter = 0
done = False
while not done:
act_probs = torch.distributions.Categorical(actor(obs.to(device)))
action = act_probs.sample()
action = action.cpu().detach().numpy()
next_state, reward, done, info = env.step(action)
action = torch.tensor(action, dtype=torch.float32).to(device)
tot_rewards += np.power(gamma, iter) * reward
iter += 1
transitions.append((obs, action, act_probs.log_prob(action), tot_rewards))
obs = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
print("Discounted Reward = ", tot_rewards)
batch_obs = torch.Tensor([s.numpy() for (s, a, a_p, r) in transitions]).to(device)
# print("batch_obs shape = ", np.array(batch_obs).shape)
batch_act = torch.Tensor([a for (s, a, a_p, r) in transitions]).to(device)
batch_log_probs = torch.Tensor([a_p for (s, a, a_p, r) in transitions]).to(device)
batch_rtgs = torch.Tensor([r for (s, a, a_p, r) in transitions]).flip(dims = (0,)).to(device)
return batch_obs, batch_act, batch_log_probs, batch_rtgs
This is where the learning happens -
for i in range(episodes):
batch_obs, batch_act, batch_log_probs, batch_rtgs = rollout()
value = critic(batch_obs)
batch_rtgs = batch_rtgs
# todo Why are we detaching value
A_k = batch_rtgs - value.squeeze().detach()
for _ in range(training_iters):
value = critic(batch_obs).squeeze()
act_probs = torch.distributions.Categorical(actor(batch_obs))
action = act_probs.sample()
log_probs = act_probs.log_prob(action).squeeze()
ratios = torch.exp(log_probs - batch_log_probs)
surr1 = ratios*A_k
surr2 = torch.clamp(ratios, 1 - clip, 1 + clip)*A_k
actor_loss = -torch.min(surr1, surr2).mean()
critic_loss = (value - batch_rtgs).pow(2).mean()
#todo No idea why we are doing retain_graph = True
policy_opt.zero_grad()
actor_loss.backward(retain_graph=True)
policy_opt.step()
value_opt.zero_grad()
critic_loss.backward(retain_graph=True)
value_opt.step()
What I've already done -
A major bug I found was that the actor and critic loss didn't have dimension 1, before taking the mean. Therefore the loss function was an n-dimensional matrix before being averaged which was obviously wrong. I believe I have fixed it now.
Here's the entire code -
#Modified this code - https://github.com/DeepReinforcementLearning/DeepReinforcementLearningInAction/blob/master/Chapter%204/Ch4_book.ipynb
#Also, modified this code - https://github.com/higgsfield/RL-Adventure-2/blob/master/1.actor-critic.ipynb
# Also, modified this code - https://github.com/ericyangyu/PPO-for-Beginners/blob/9abd435771aa84764d8d0d1f737fa39118b74019/ppo.py#L151
import numpy as np
import gym
import torch
from torch import nn
import matplotlib.pyplot as plt
env = gym.make('Pendulum-v1')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
learning_rate = 0.0001
episodes = 10000
gamma = 0.99
clip = 0.2
#No idea whether these hyperparameters are good
ppo_batch = 30
training_iters = 5
dim_action = env.action_space.shape[0]
class Actor(nn.Module):
def __init__(self, state_size, action_size):
super(Actor, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_relu_stack = nn.Sequential(
nn.Linear(state_size, 300),
nn.ReLU(),
nn.Linear(300, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, action_size),
nn.Softmax()
)
def forward(self,x):
x = self.linear_relu_stack(x)
return x
class Critic(nn.Module):
def __init__(self, state_size, action_size):
super(Critic, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_stack = nn.Sequential(
nn.Linear(state_size, 300),
nn.ReLU(),
nn.Linear(300, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, 1)
)
def forward(self, x):
x = self.linear_stack(x)
return x
def rollout():
for i in range(ppo_batch): # 100 episodes should be good?
print("Rollout process, i = ", i)
obs = torch.tensor(env.reset(), dtype=torch.float32).unsqueeze(0)
tot_rewards = 0
transitions = []
iter = 0
done = False
while not done:
act_probs = torch.distributions.Categorical(actor(obs.to(device)))
action = act_probs.sample()
action = action.cpu().detach().numpy()
next_state, reward, done, info = env.step(action)
action = torch.tensor(action, dtype=torch.float32).to(device)
tot_rewards += np.power(gamma, iter) * reward
iter += 1
transitions.append((obs, action, act_probs.log_prob(action), tot_rewards))
obs = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
print("Discounted Reward = ", tot_rewards)
batch_obs = torch.Tensor([s.numpy() for (s, a, a_p, r) in transitions]).to(device)
# print("batch_obs shape = ", np.array(batch_obs).shape)
batch_act = torch.Tensor([a for (s, a, a_p, r) in transitions]).to(device)
batch_log_probs = torch.Tensor([a_p for (s, a, a_p, r) in transitions]).to(device)
batch_rtgs = torch.Tensor([r for (s, a, a_p, r) in transitions]).flip(dims = (0,)).to(device)
return batch_obs, batch_act, batch_log_probs, batch_rtgs
actor = Actor(env.observation_space.shape[0], dim_action).to(device)
critic = Critic(env.observation_space.shape[0], dim_action).to(device)
policy_opt = torch.optim.Adam(params = actor.parameters(), lr = learning_rate)
value_opt = torch.optim.Adam(params = critic.parameters(), lr = learning_rate)
score = []
for i in range(episodes):
batch_obs, batch_act, batch_log_probs, batch_rtgs = rollout()
value = critic(batch_obs)
batch_rtgs = batch_rtgs
# todo Why are we detaching value
A_k = batch_rtgs - value.squeeze().detach()
for _ in range(training_iters):
value = critic(batch_obs).squeeze()
act_probs = torch.distributions.Categorical(actor(batch_obs))
action = act_probs.sample()
log_probs = act_probs.log_prob(action).squeeze()
ratios = torch.exp(log_probs - batch_log_probs)
surr1 = ratios*A_k
surr2 = torch.clamp(ratios, 1 - clip, 1 + clip)*A_k
actor_loss = -torch.min(surr1, surr2).mean()
critic_loss = (value - batch_rtgs).pow(2).mean()
#todo No idea why we are doing retain_graph = True
policy_opt.zero_grad()
actor_loss.backward(retain_graph=True)
policy_opt.step()
value_opt.zero_grad()
critic_loss.backward(retain_graph=True)
value_opt.step()

Related

Converting generative transformer model from keras to PyTorch

I would like to re-create the following keras model in PyTorch.
vocab_size = 22
maxlen = 200
embed_dim = 256
num_heads = 2
feed_forward_dim = 256
batch_size = 128
decoders = 5
def create_model():
inputs = layers.Input(shape=(maxlen,), dtype=tf.int32)
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
decoder_blocks = []
for i in range(decoders):
decoder_blocks.append(DecoderBlock(embed_dim, num_heads, feed_forward_dim))
for i in range(len(decoder_blocks)):
x = decoder_blocks[i](x)
outputs = layers.Dense(vocab_size)(x)
model = keras.Model(inputs=inputs, outputs=[outputs, x])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
loss=[loss_fn, None],
)
return model
model = create_model()
Here are the Decoder and the TokenAndPositionEmbedding layers along with the Causal Attention Mask
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
i = tf.range(n_dest)[:, None]
j = tf.range(n_src)
m = i >= j - n_src + n_dest
mask = tf.cast(m, dtype)
mask = tf.reshape(mask, [1, n_dest, n_src])
mult = tf.concat(
[tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
)
return tf.tile(mask, mult)
class DecoderBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super(DecoderBlock, self).__init__()
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = tf.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, tf.bool)
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super(TokenAndPositionEmbedding, self).__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
For reference, this code is copied directly from: https://keras.io/examples/generative/text_generation_with_miniature_gpt/
I have tried to create equivalent architecture in PyTorch using nn.TransformerDecoderLayer. Apologies for not including my own code, but I have been completely unsuccessful.

'Not callable' error when calculating integrated gradient interpretability with captum

I’m trying to perform model interpretability with captum but running into an error. Specifically, it says:
/usr/lib/python3.7/inspect.py in _signature_from_callable(obj, follow_wrapper_chains, skip_bound_arg, sigcls)
2206
2207 if not callable(obj):
-> 2208 raise TypeError('{!r} is not a callable object'.format(obj))
2209
2210 if isinstance(obj, types.MethodType):
I’m not certain how to resolve this. Here’s the definition of my model, for reference:
class dvib(nn.Module):
def __init__(self,k,out_channels, hidden_size):
super(dvib, self).__init__()
self.conv = torch.nn.Conv2d(in_channels=1,
out_channels = out_channels,
kernel_size = (1,20),
stride=(1,1),
padding=(0,0),
)
self.rnn = torch.nn.GRU(input_size = out_channels,
hidden_size = hidden_size,
num_layers = 2,
bidirectional = True,
batch_first = True,
dropout = 0.2
)
self.fc1 = nn.Linear(hidden_size*4, hidden_size*4)
self.enc_mean = nn.Linear(hidden_size*4+578,k)
self.enc_std = nn.Linear(hidden_size*4+578,k)
self.dec = nn.Linear(k, 2)
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.constant_(self.fc1.bias, 0.0)
nn.init.xavier_uniform_(self.enc_mean.weight)
nn.init.constant_(self.enc_mean.bias, 0.0)
nn.init.xavier_uniform_(self.enc_std.weight)
nn.init.constant_(self.enc_std.bias, 0.0)
nn.init.xavier_uniform_(self.dec.weight)
nn.init.constant_(self.dec.bias, 0.0)
def cnn_gru(self,x,lens):
print(x.shape)
x = x.unsqueeze(1)
print('after first unsqueeze: ', x.shape)
x = self.conv(x)
print('after conv: ', x.shape)
x = torch.nn.ReLU()(x)
print('shape after relu: ', x.shape,type(x))
x = x.squeeze(3)
print('shape after squeeze: ', x.shape)
x = x.view(x.size(0),-1)
x = x.permute(0,2,1)
print('shape after permute: ', x.shape)
print(type(lens))
gru_input = pack_padded_sequence(x,lens,batch_first=True, enforce_sorted=False)
output, hidden = self.rnn(gru_input)
print('hidden layer: ', hidden.shape)
output_all = torch.cat([hidden[-1],hidden[-2],hidden[-3],hidden[-4]],dim=1)
print("output_all.shape:",output_all.shape)
return output_all
def forward(self, pssm, lengths, FEGS):
cnn_vectors = self.cnn_gru(pssm, lengths)
feature_vec = torch.cat([cnn_vectors, FEGS], dim = 1)
enc_mean, enc_std = self.enc_mean(feature_vec), f.softplus(self.enc_std(feature_vec)-5)
eps = torch.randn_like(enc_std)
latent = enc_mean + enc_std*eps
outputs = f.sigmoid(self.dec(latent))
print(outputs.shape)
return outputs, enc_mean, enc_std, latent
I load pretrained weights into the model as well, prior to passing it to captum with the relevant arguments:
ig = IntegratedGradients(model(test_pssm_small, test_len_small, test_FEGS_small))
attr = ig.attribute(test_FEGS_small, n_steps=5)

Dimension error in neural network model for classification

Below is the code for Hierarchical Attention Networks, taken from https://github.com/arunarn2/HierarchicalAttentionNetworks. The only difference in the code on the link and mine is that I have 3 classes for classification, whereas they are using 2
maxlen = 100
max_sentences = 15
max_words = 20000
embedding_dim = 100
validation_split = 0.2
#class defining the custom attention layer
class HierarchicalAttentionNetwork(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(HierarchicalAttentionNetwork, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim,)))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weightss = [self.W, self.b, self.u]
super(HierarchicalAttentionNetwork, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.exp(K.squeeze(K.dot(uit, self.u), -1))
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
weighted_input = x * K.expand_dims(ait)
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
# building Hierachical Attention network
embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=maxlen, trainable=True, mask_zero=True)
sentence_input = Input(shape=(maxlen,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
lstm_word = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
attn_word = HierarchicalAttentionNetwork(100)(lstm_word)
sentenceEncoder = Model(sentence_input, attn_word)
review_input = Input(shape=(max_sentences, maxlen), dtype='int32')
review_encoder = TimeDistributed(sentenceEncoder)(review_input)
lstm_sentence = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
attn_sentence = HierarchicalAttentionNetwork(100)(lstm_sentence)
preds = Dense(3, activation='softmax')(attn_sentence)
model = Model(review_input, preds)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print("model fitting - Hierachical attention network")
Following is the error I get. Please help me understand what the error means and how I can possibly resolve it.

Chainer how to save and load DQN model

I'm learning the Deep Reinforcement learning
framework Chainer.
I've followed a tutorial and gotten the following code:
def train_dddqn(env):
class Q_Network(chainer.Chain):
def __init__(self, input_size, hidden_size, output_size):
super(Q_Network, self).__init__(
fc1=L.Linear(input_size, hidden_size),
fc2=L.Linear(hidden_size, hidden_size),
fc3=L.Linear(hidden_size, hidden_size // 2),
fc4=L.Linear(hidden_size, hidden_size // 2),
state_value=L.Linear(hidden_size // 2, 1),
advantage_value=L.Linear(hidden_size // 2, output_size)
)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
def __call__(self, x):
h = F.relu(self.fc1(x))
h = F.relu(self.fc2(h))
hs = F.relu(self.fc3(h))
ha = F.relu(self.fc4(h))
state_value = self.state_value(hs)
advantage_value = self.advantage_value(ha)
advantage_mean = (F.sum(advantage_value, axis=1) / float(self.output_size)).reshape(-1, 1)
q_value = F.concat([state_value for _ in range(self.output_size)], axis=1) + (
advantage_value - F.concat([advantage_mean for _ in range(self.output_size)], axis=1))
return q_value
def reset(self):
self.cleargrads()
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = copy.deepcopy(Q)
optimizer = chainer.optimizers.Adam()
optimizer.setup(Q)
epoch_num = 50
step_max = len(env.data) - 1
memory_size = 200
batch_size = 50
epsilon = 1.0
epsilon_decrease = 1e-3
epsilon_min = 0.1
start_reduce_epsilon = 200
train_freq = 10
update_q_freq = 20
gamma = 0.97
show_log_freq = 5
memory = []
total_step = 0
total_rewards = []
total_losses = []
start = time.time()
for epoch in range(epoch_num):
pobs = env.reset()
step = 0
done = False
total_reward = 0
total_loss = 0
while not done and step < step_max:
# select act
pact = np.random.randint(3)
if np.random.rand() > epsilon:
pact = Q(np.array(pobs, dtype=np.float32).reshape(1, -1))
pact = np.argmax(pact.data)
# act
obs, reward, done = env.step(pact)
# add memory
memory.append((pobs, pact, reward, obs, done))
if len(memory) > memory_size:
memory.pop(0)
# train or update q
if len(memory) == memory_size:
if total_step % train_freq == 0:
shuffled_memory = np.random.permutation(memory)
memory_idx = range(len(shuffled_memory))
for i in memory_idx[::batch_size]:
batch = np.array(shuffled_memory[i:i + batch_size])
b_pobs = np.array(batch[:, 0].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_pact = np.array(batch[:, 1].tolist(), dtype=np.int32)
b_reward = np.array(batch[:, 2].tolist(), dtype=np.int32)
b_obs = np.array(batch[:, 3].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_done = np.array(batch[:, 4].tolist(), dtype=np.bool)
q = Q(b_pobs)
indices = np.argmax(q.data, axis=1)
maxqs = Q_ast(b_obs).data
target = copy.deepcopy(q.data)
for j in range(batch_size):
Q.reset()
loss = F.mean_squared_error(q, target)
total_loss += loss.data
loss.backward()
optimizer.update()
if total_step % update_q_freq == 0:
Q_ast = copy.deepcopy(Q)
# epsilon
if epsilon > epsilon_min and total_step > start_reduce_epsilon:
epsilon -= epsilon_decrease
# next step
total_reward += reward
pobs = obs
step += 1
total_step += 1
total_rewards.append(total_reward)
total_losses.append(total_loss)
if (epoch + 1) % show_log_freq == 0:
log_reward = sum(total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
log_loss = sum(total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
elapsed_time = time.time() - start
print('\t'.join(map(str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
start = time.time()
return Q, total_losses, total_rewards
Q, total_losses, total_rewards = train_dddqn(Environment1(train))
My question is how can I save and load this Model which has been train very well?I know Kreas has some function like: model.save and load_model.
So what's the specify code I need for this Chainer code?

You can use serializer module to save/load chainer's model's parameter (Chain class).
from chainer import serializers
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
# --- train Q here... ---
# copy Q parameter into Q_ast by saving Q's parameter and load to Q_ast
serializers.save_npz('my.model', Q)
serializers.load_npz('my.model', Q_ast)
See official document for details:
http://docs.chainer.org/en/stable/guides/serializers.html
Also, you may refer chainerrl, which is a chainer library for reinforcement learning.
https://github.com/chainer/chainerrl
chainerrl have a util function copy_param to copy parameter from network source_link to target_link.
https://github.com/chainer/chainerrl/blob/master/chainerrl/misc/copy_param.py#L12-L30

keras change the parameters during training

I have a customized layer to do a simple linear-transformation. like x*w+b. I want to change the w and b during the training, is that possible? For example, I want w1 in the first iteration and w2 in second iteration.(w1 and w2 defined by myself).

Of course, you can do it, but you need to do it in a smart way. Here is some code you can play with.
from keras import backend as K
from keras.layers import *
from keras.models import *
import numpy as np
class MyDense( Layer ) :
def __init__( self, units=64, use_bias=True, **kwargs ) :
super(MyDense, self).__init__( **kwargs )
self.units = units
self.use_bias = use_bias
return
def build( self, input_shape ) :
input_dim = input_shape[-1]
self.count = 0
self.w1 = self.add_weight(shape=(input_dim, self.units), initializer='glorot_uniform', name='w1')
self.w0 = self.add_weight(shape=(input_dim, self.units), initializer='glorot_uniform', name='w0')
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),initializer='glorot_uniform',name='bias' )
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
return
def call( self, x ) :
if self.count % 2 == 1 :
c0, c1 = 0, 1
else :
c0, c1 = 1, 0
w = c0 * self.w0 + c1 * self.w1
self.count += 1
output = K.dot( x, w )
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
# define a dummy model
x = Input(shape=(128,))
y = MyDense(10)(x)
y = Dense(1, activation='sigmoid')(y)
model = Model(inputs=x, outputs=y)
print model.summary()
# get some dummy data
a = np.random.randn(100,128)
b = (np.random.randn(100,) > 0).astype('int32')
# compile and train
model.compile('adam', 'binary_crossentropy')
model.fit( a, b )
Note: the following code is equivalent to what we did above, but it will NOT work !!!
if self.count % 2 == 1 :
w = self.w0
else :
w = self.w1
Why? Because having zero gradients (the former implementation) for one variable is NOT equivalent to having None gradients (the later implementation).

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

Why is my PPO algorithm not learning a simple environment? - machine-learning

Related

Converting generative transformer model from keras to PyTorch

'Not callable' error when calculating integrated gradient interpretability with captum

Dimension error in neural network model for classification

Chainer how to save and load DQN model

keras change the parameters during training

Categories

Resources