Removing some layers in a pre-trained model in pytorch - image-processing

I need to use a pytorch pre-trained model but I want to remove some layers of that. This is the original model:
def forward_features(self, x):
x = self.patch_embed1(x)
x = self.pos_drop(x)
for i, blk in enumerate(self.blocks1):
if self.use_checkpoint and i < self.checkpoint_num[0]:
x = checkpoint.checkpoint(blk, x)
else:
x = blk(x)
x = self.patch_embed2(x)
for i, blk in enumerate(self.blocks2):
if self.use_checkpoint and i < self.checkpoint_num[1]:
x = checkpoint.checkpoint(blk, x)
else:
x = blk(x)
x = self.patch_embed3(x)
for i, blk in enumerate(self.blocks3):
if self.use_checkpoint and i < self.checkpoint_num[2]:
x = checkpoint.checkpoint(blk, x)
else:
x = blk(x)
x = self.patch_embed4(x)
for i, blk in enumerate(self.blocks4):
if self.use_checkpoint and i < self.checkpoint_num[3]:
x = checkpoint.checkpoint(blk, x)
else:
x = blk(x)
x = self.norm(x)
x = self.pre_logits(x)
return x
def forward(self, x):
x = x[0] #(2)
x = self.forward_features(x)
x = x.flatten(2).mean(-1)
x = self.head(x)
return x
I want to change the model in this manner:
def forward_features(self, x):
x = self.patch_embed1(x)
x = self.pos_drop(x)
for i, blk in enumerate(self.blocks1):
if self.use_checkpoint and i < self.checkpoint_num[0]:
features1 = checkpoint.checkpoint(blk, x)
else:
features1 = blk(x)
y_2 = self.patch_embed2(features1)
for i, blk in enumerate(self.blocks2):
if self.use_checkpoint and i < self.checkpoint_num[1]:
features2 = checkpoint.checkpoint(blk, y_2)
else:
features2 = blk(y_2)
y_3 = self.patch_embed3(features2)
for i, blk in enumerate(self.blocks3):
if self.use_checkpoint and i < self.checkpoint_num[2]:
features3 = checkpoint.checkpoint(blk, y_3)
else:
features3 = blk(y_3)
y_4 = self.patch_embed4(features3)
for i, blk in enumerate(self.blocks4):
if self.use_checkpoint and i < self.checkpoint_num[3]:
features4 = checkpoint.checkpoint(blk, y_4)
else:
features4 = blk(y_4)
return x, features1, features2, features3, features4
def forward(self, x):
x, features1, features2, features3, features4 = self.forward_features(z)
return features1, features2, features3, features4
How can I make these changes without having problems of mismatch between pretrained weights and the model? This is because the pytorch does not accept my modifications when it checks the similarity between model and the wrights.
Traceback of the error:
Traceback (most recent call last):
File "/home/user/code/train.py", line 370, in <module>
main()
File "/home/user/code/train.py", line 101, in main
model = modelName(pretrained=encoder_pretrained)
File "/home/user/code/models/model.py", line 72, in __init__
assert list_model_dict[i][1].shape==list_weight_dict[i][1].shape
AssertionError
The error is faced in the following part:
if pretrained:
print('Loading weights...')
weight_dict=torch.load(os.path.join('models','uniformer_small_k400_16x4.pth'))
model_dict=self.featureExtractor.state_dict()
list_model_dict=list(model_dict.items())
list_weight_dict=list(weight_dict.items())
for i in range(len(list_model_dict)):
assert list_model_dict[i][1].shape==list_weight_dict[i][1].shape # This is the line of 72
model_dict[list_model_dict[i][0]].copy_(weight_dict[list_weight_dict[i][0]])
for i in range(len(list_model_dict)):
assert torch.all(torch.eq(model_dict[list_model_dict[i][0]],weight_dict[list_weight_dict[i][0]].to('cpu')))
print('Loading done!')

Related

Converting generative transformer model from keras to PyTorch

I would like to re-create the following keras model in PyTorch.
vocab_size = 22
maxlen = 200
embed_dim = 256
num_heads = 2
feed_forward_dim = 256
batch_size = 128
decoders = 5
def create_model():
inputs = layers.Input(shape=(maxlen,), dtype=tf.int32)
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
decoder_blocks = []
for i in range(decoders):
decoder_blocks.append(DecoderBlock(embed_dim, num_heads, feed_forward_dim))
for i in range(len(decoder_blocks)):
x = decoder_blocks[i](x)
outputs = layers.Dense(vocab_size)(x)
model = keras.Model(inputs=inputs, outputs=[outputs, x])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
loss=[loss_fn, None],
)
return model
model = create_model()
Here are the Decoder and the TokenAndPositionEmbedding layers along with the Causal Attention Mask
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
i = tf.range(n_dest)[:, None]
j = tf.range(n_src)
m = i >= j - n_src + n_dest
mask = tf.cast(m, dtype)
mask = tf.reshape(mask, [1, n_dest, n_src])
mult = tf.concat(
[tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
)
return tf.tile(mask, mult)
class DecoderBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super(DecoderBlock, self).__init__()
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = tf.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, tf.bool)
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super(TokenAndPositionEmbedding, self).__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
For reference, this code is copied directly from: https://keras.io/examples/generative/text_generation_with_miniature_gpt/
I have tried to create equivalent architecture in PyTorch using nn.TransformerDecoderLayer. Apologies for not including my own code, but I have been completely unsuccessful.

AttributeError: 'Model' object has no attribute '_backward_hooks'

Trying to implement the reaserch paper:
https://ieeexplore.ieee.org/document/9479786/
Training a Monotone Network with architechture:
class Model(nn.Module):
def __init__(self, q, s):
self.layer_s_list = [nn.Linear(5, s) for _ in range(q)]
self.inv_w, self.inv_b = self.get_layer_weights()
def forward(self, x):
# print(inv_w[0].shape, inv_b[0].shape)
output_lst = []
for layer in self.layer_s_list:
v, id = torch.max(layer(x), 1)
output_lst.append(v.detach().numpy())
output_lst = np.array(output_lst)
output_lst = torch.from_numpy(output_lst)
out, _ = torch.min(output_lst, 0)
allo_out = F.softmax(out)
pay_out = nn.ReLU(inplace = True)(out)
inv_out_lst = []
for q_idx in range(len(self.inv_w)):
# print(inv_w[q_idx].shape, pay_out.shape, inv_b[q_idx].shape)
y, _ = torch.min(torch.linalg.pinv(self.inv_w[q_idx]) * (pay_out - self.inv_b[q_idx]), 0)
inv_out_lst.append(y.detach().numpy())
final_out = np.array(inv_out_lst)
final_out = torch.from_numpy(final_out)
final_out, _ = torch.max(final_out, 1)
return final_out, allo_out
def get_layer_weights(self):
weights_lst = []
bias_lst = []
for layer in self.layer_s_list:
weights_lst.append(layer.state_dict()['weight'])
bias_lst.append(layer.state_dict()['bias'])
return weights_lst, bias_lst
When I initialise the network and run for random inputs:
q = 5
s = 10
x = torch.rand((10, 5), requires_grad = True)
net = Model(q, s)
y, z = net(x)`
It gives the following error:
AttributeError Traceback (most recent call last)
<ipython-input-3-aac6d239df1f> in <module>
1 x = torch.rand((10, 5), requires_grad = True)
2 net = Model(5, 10)
----> 3 y = net(x)
1 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __getattr__(self, name)
1206 return modules[name]
1207 raise AttributeError("'{}' object has no attribute '{}'".format(
-> 1208 type(self).__name__, name))
1209
1210 def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:
AttributeError: 'Model' object has no attribute '_backward_hooks'
Please help me understand what this error is and how to fix it.
You forgot to initialize your model with __init__ method of its parent class nn.Module. A simple solution is to add one line in your Model.__init__:
class Model(nn.Module):
def __init__(self, q, s):
super(Model, self).__init__()
self.layer_s_list = [nn.Linear(5, s) for _ in range(q)]
self.inv_w, self.inv_b = self.get_layer_weights()

Keras Error TypeError: ('Keyword argument not understood:', 'mode')

**I am using 100 tiramisu code and I am getting this error. I know it is probably because of version changes in Keras but not sure how to fix it.
I have changed the old merger method to keras.layer.concatenate but it still gives the same error.**
def relu(x): return Activation('relu')(x)
def dropout(x, p): return Dropout(p)(x) if p else x
def bn(x): return BatchNormalization(mode=2, axis=-1)(x)
def relu_bn(x): return relu(bn(x))
def concat(xs): return keras.layers.Concatenate(xs, mode='concat', concat_axis=-1)
def conv(x, nf, sz, wd, p, stride=1):
# x = Convolution2D(nf, sz, sz, init='he_uniform', border_mode='same',
# subsample=(stride,stride), W_regularizer=regularizers.l1_l2(wd))(x)
x = Convolution2D(nf, (sz, sz), padding='same',
strides=(stride,stride), kernel_regularizer=regularizers.l1_l2(wd))(x)
return dropout(x, p)
def down_path(x, nb_layers, growth_rate, p, wd):
skips = []
for i,n in enumerate(nb_layers):
x,added = dense_block(n,x,growth_rate,p,wd)
skips.append(x)
x = transition_dn(x, p=p, wd=wd)
return skips, added
def transition_up(added, wd=0):
x = concat(added)
_,r,c,ch = x.get_shape().as_list()
W_regularizer=l2(wd))(x)
return Deconvolution2D(ch, (3, 3), (None,r*2,c*2,ch),
padding='same', stride=(2,2), kernel_regularizer=l2(wd))(x)
def up_path(added, skips, nb_layers, growth_rate, p, wd):
for i,n in enumerate(nb_layers):
x = transition_up(added, wd)
x = concat([x,skips[i]])
x,added = dense_block(n,x,growth_rate,p,wd)
return x
def reverse(a): return list(reversed(a))
def create_tiramisu(nb_classes, img_input, nb_dense_block=6,
growth_rate=16, nb_filter=48, nb_layers_per_block=5, p=None, wd=0):
if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
nb_layers = list(nb_layers_per_block)
else: nb_layers = [nb_layers_per_block] * nb_dense_block
x = conv(img_input, nb_filter, 3, wd, 0)
skips,added = down_path(x, nb_layers, growth_rate, p, wd)
x = up_path(added, reverse(skips[:-1]), reverse(nb_layers[:-1]), growth_rate, p, wd)
x = conv(x, nb_classes, 1, wd, 0)
_,r,c,f = x.get_shape().as_list()
x = Reshape((-1, nb_classes))(x)
return Activation('softmax')(x)
input_shape = (224,224,3)
img_input = Input(shape=input_shape)
x = create_tiramisu(32, img_input, nb_layers_per_block=[4,5,7,10,12,15], p=0.2, wd=1e-4)
The error I am getting is:
TypeError Traceback (most recent call last)
<ipython-input-80-acecdf7dd0b2> in <module>()
1 input_shape = (224,224,3)
2 img_input = Input(shape=input_shape)
----> 3 x = create_tiramisu(32, img_input, nb_layers_per_block=[4,5,7,10,12,15], p=0.2, wd=1e-4)
10 frames
/usr/local/lib/python3.7/dist-packages/keras/utils/generic_utils.py in validate_kwargs(kwargs, allowed_kwargs, error_message)
1172 for kwarg in kwargs:
1173 if kwarg not in allowed_kwargs:
-> 1174 raise TypeError(error_message, kwarg)
1175
1176
TypeError: ('Keyword argument not understood:', 'mode')
I have tried to change a few arguments which changed from Keras version but still give the wrong answer.

dropout(): argument 'input' (position 1) must be Tensor, not tuple when using XLNet with HuggingfCE

I get an error saying that the input should be of type Tensor, not tuple. I do not know how to work around this problem, as I am already implementing the return_dict=False method as stated in the migration plan.
My model is as follows:
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased', return_dict=False)
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x)
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
The error occurs when calling the dropout.
The XLNetModel returns two output values:
last_hidden_state
mems
That means you get a tuple and not a single tensor as the error message says. Your class definition should therefore be:
from transformers import XLNetModel, XLNetTokenizerFast
import torch
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased', return_dict=False)
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x[0])
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
tokenizer = XLNetTokenizerFast.from_pretrained('xlnet-base-cased')
model = XLNetClassifier()
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt", return_token_type_ids=False)
outputs = model(**inputs)
or even better without return_dict=False
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased')
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x.last_hidden_state)
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x

Chainer Autoencoder

I am trying to write a vanilla autoencoder for compressing 13 images. However I am getting the following error:
ValueError: train argument is not supported anymore. Use chainer.using_config
The shape of images is (21,28,3).
filelist = 'ex1.png', 'ex2.png',...11 other images
x = np.array([np.array(Image.open(fname)) for fname in filelist])
xs = x.astype('float32')/255.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,800)
self.l2 = L.Linear(800,300)
# decoder part
self.l3 = L.Linear(300,800)
self.l4 = L.Linear(800,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
trainer.run()
Is the issue because of the number of nodes in the model or otherwise?
You need to wirte "decoder" part.
When you take mean_squared_error loss, the shape of h and x must be same.
AutoEncoder will encode original x to small space (100-dim) h, but after that we need to reconstruct x' from this h by adding decoder part.
Then loss can be calculated on this reconstructed x'.
For example, as follows (sorry i have not test it to run)
For Chainer v2~
train argument is handled by global configs, so you do not need train argument in dropout function.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
For Chainer v1
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
You can also refer official Variational Auto Encoder example for the next step:
https://github.com/chainer/chainer/tree/master/examples/vae

Resources