How to automatically convert a PyTorch model to a HuggingFace model? - machine-learning

Say I created a Pytorch model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model_1_encoder = RobertaModel.from_pretrained('roberta-base')
self.model_2_encoder = RobertaModel.from_pretrained('roberta-base')
self.dropout = nn.Dropout(0.5)
self.linear_model_1_out = nn.Linear(768, 512)
self.linear_model_2_out = nn.Linear(768, 512)
self.linear3 = nn.Linear(512, 512)
self.linear4 = nn.Linear(512, 1)
def forward(self, x):
question_input_ids, attn_mask, xyz_input_ids, xyz_attention_mask = torch.tensor(x['input_ids']), torch.tensor(x['attention_mask']), torch.tensor(x['xyz_ids']), torch.tensor(x['xyz_mask'])
model_1_outputs = self.model_1_encoder(question_input_ids, attention_mask=attn_mask)
model_2_outputs = self.model_2_encoder(xyz_input_ids, attention_mask = xyz_attention_mask)
model_1_outputs = self.dropout(model_1_outputs[0])
model_2_outputs = self.dropout(model_2_outputs[0])
model_2_outputs = torch.sum(model_2_outputs, dim=0)
model_2_outputs = torch.unsqueeze(model_2_outputs, 0)
model_2_outputs = model_2_outputs.repeat(model_1_outputs.shape[0], 1, 1)
model_1_outputs = self.linear_model_1_out(model_1_outputs)
model_2_outputs = self.linear_model_2_out(model_2_outputs)
model_1_and_xyz_sum = model_2_outputs + model_1_outputs
outputs = self.dropout(model_1_and_xyz_sum)
outputs = outputs[:,0,:].view(-1,512)
outputs = torch.relu(self.linear3(outputs))
outputs = self.dropout(outputs)
outputs = torch.relu(self.linear4(outputs))
return outputs
model = Model()
From this SO question I understand that I need to create a config file for the Pytorch model that will be loaded by the model. They referenced How to add a model to Transformers?, which if I understands correctly requires manual writing of each layer in my custom model in addition to various other inputs and steps. I also found this question on their forum, which referenced this, which had even more steps to complete in order to convert the Pytorch model.
How can I automatically convert my Pytorch model to a Hugginface model so that I can use their Trainer class (in addition to other functionalities like the DataCollator, etc.)?
Potential optimal pseudocode:
my_pytorch_model = Model()
my_huggingface_model = Custom_huggingface_Model.load_model(my_pytorch_model)

Related

Why is an empty response returned in the "answer" variable?

I have this code. But can't understand why answer always empty with any input texts. Thank you very much for the help:
import torch
from transformers import AutoTokenizer, AutoModel
# Load the pre-trained BERT model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
# Encode the article text and the question
article_text = "The Sun is a star"
question = "What is The Sun?"
encoded_article_text = tokenizer.encode(article_text, return_tensors="pt")
encoded_question = tokenizer.encode(question, return_tensors="pt")
# Concatenate the article and the question
input_ids = torch.cat([encoded_article_text, encoded_question], dim=-1)
# Predict the start and end position of the answer in the article
output = model(input_ids)
start_logits = output[0][:, :encoded_article_text.size()[1]]
end_logits = output[1][:, :encoded_article_text.size()[1]]
# Find the answer by selecting the highest scoring start and end position
start_index = torch.argmax(start_logits)
end_index = torch.argmax(end_logits)
answer_tokens = encoded_article_text[0, start_index:end_index+1]
answer = tokenizer.decode(answer_tokens)
print("Answer:", answer)
tried change inputs with different texts and answers

How to spllit laserscan data from lidar into sections and view them on rviz

I was trying to split the laser scan range data into subcategories and like to publish each category into different laser topics.
to specify more, the script should get one topic as an input - /scan and the script should publish three topics as follow = scan1, scan2, scan3
is there a way to split the laser scan and publish back and look them on rviz
I tried the following
def callback(laser):
current_time = rospy.Time.now()
regions["l_f_fork"] = laser.ranges[0:288]
regions["l_f_s"] = laser.ranges[289:576]
regions["stand"] = laser.ranges[576:864]
l.header.stamp = current_time
l.header.frame_id = 'laser'
l.angle_min = 0
l.angle_max = 1.57
l.angle_increment =0
l.time_increment = 0
l.range_min = 0.0
l.range_max = 100.0
l.ranges = regions["l_f_fork"]
l.intensities = [0]
left_fork.publish(l)
# l.ranges = regions["l_f_s"]
# left_side.publish(l)
# l.ranges = regions["stand"]
# left_side.publish(l)
rospy.loginfo("publishing new info")
I can see the different topics on rviz, but they are lies on the same line,
Tutorial
The following code splits the LaserScan data into three equal sections:
#! /usr/bin/env python3
"""
Program to split LaserScan into three parts.
"""
import rospy
from sensor_msgs.msg import LaserScan
class LaserScanSplit():
"""
Class for splitting LaserScan into three parts.
"""
def __init__(self):
self.update_rate = 50
self.freq = 1./self.update_rate
# Initialize variables
self.scan_data = []
# Subscribers
rospy.Subscriber("/scan", LaserScan, self.lidar_callback)
# Publishers
self.pub1 = rospy.Publisher('/scan1', LaserScan, queue_size=10)
self.pub2 = rospy.Publisher('/scan2', LaserScan, queue_size=10)
self.pub3 = rospy.Publisher('/scan3', LaserScan, queue_size=10)
# Timers
rospy.Timer(rospy.Duration(self.freq), self.laserscan_split_update)
def lidar_callback(self, msg):
"""
Callback function for the Scan topic
"""
self.scan_data = msg
def laserscan_split_update(self, event):
"""
Function to update the split scan topics
"""
scan1 = LaserScan()
scan2 = LaserScan()
scan3 = LaserScan()
scan1.header = self.scan_data.header
scan2.header = self.scan_data.header
scan3.header = self.scan_data.header
scan1.angle_min = self.scan_data.angle_min
scan2.angle_min = self.scan_data.angle_min
scan3.angle_min = self.scan_data.angle_min
scan1.angle_max = self.scan_data.angle_max
scan2.angle_max = self.scan_data.angle_max
scan3.angle_max = self.scan_data.angle_max
scan1.angle_increment = self.scan_data.angle_increment
scan2.angle_increment = self.scan_data.angle_increment
scan3.angle_increment = self.scan_data.angle_increment
scan1.time_increment = self.scan_data.time_increment
scan2.time_increment = self.scan_data.time_increment
scan3.time_increment = self.scan_data.time_increment
scan1.scan_time = self.scan_data.scan_time
scan2.scan_time = self.scan_data.scan_time
scan3.scan_time = self.scan_data.scan_time
scan1.range_min = self.scan_data.range_min
scan2.range_min = self.scan_data.range_min
scan3.range_min = self.scan_data.range_min
scan1.range_max = self.scan_data.range_max
scan2.range_max = self.scan_data.range_max
scan3.range_max = self.scan_data.range_max
# LiDAR Range
n = len(self.scan_data.ranges)
scan1.ranges = [float('inf')] * n
scan2.ranges = [float('inf')] * n
scan3.ranges = [float('inf')] * n
# Splitting Block [three equal parts]
scan1.ranges[0 : n//3] = self.scan_data.ranges[0 : n//3]
scan2.ranges[n//3 : 2*n//3] = self.scan_data.ranges[n//3 : 2*n//3]
scan3.ranges[2*n//3 : n] = self.scan_data.ranges[2*n//3 : n]
# Publish the LaserScan
self.pub1.publish(scan1)
self.pub2.publish(scan2)
self.pub3.publish(scan3)
def kill_node(self):
"""
Function to kill the ROS node
"""
rospy.signal_shutdown("Done")
if __name__ == '__main__':
rospy.init_node('laserscan_split_node')
LaserScanSplit()
rospy.spin()
The following are screenshots of the robot and obstacles in the environment in Gazebo and RViz:
References:
ROS1 Python Boilerplate
atreus

Get input of fully connected layer of ResNet model during runtime

Found a Solution, left it as an answer to this question down below :)
Info about the project: Classification task with 2 classes.
I am trying to get the output of the fully connected layer of my model for each image I put into the model during runtime. I plan to use them after the model is done training or testing all images to visualize with UMAP.
The model:
#Load resnet
def get_model():
model = torchvision.models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
return model
The relevant part of pl module:
class classifierModel(pl.LightningModule):
def __init__(self, model):
super().__init__()
self.model = model
self.learning_rate = 0.0001
def training_step(self, batch, batch_idx):
x= batch['image']
y = batch['targets']
x_hat = self.model(x)
output = nn.CrossEntropyLoss()
loss= output(x_hat,y)
return loss
def test_step(self, batch, batch_idx):
x= batch['image']
y = batch['targets']
x_hat = self.model(x)
Is it possible to do this by adding a empty list to the init of the pl module and then add the output after x_hat = model(x) is executed?
How would i know if after x_hat = model(x) is executed, the out_features aren't immediatly deleted/discarded ?
x_hat is this vector and is [batch_size, 2048]. So just modify your training step to also return x_hat.
class classifierModel(pl.LightningModule):
def __init__(self, model):
super().__init__()
self.model = model
self.learning_rate = 0.0001
self.fc_outputs = []
def training_step(self, batch, batch_idx):
x= batch['image']
y = batch['targets']
x_hat = self.model(x)
self.fc_outputs.append(x_hat)
output = nn.CrossEntropyLoss()
loss= output(x_hat,y)
return loss
The values of x_hat will not be deleted unless you explicitly call del x_hat BEFORE assigning these values elsewhere. In the case where you have already assigned the values of x_hat to another variable (in your case it sounds like you want to append it to a list) the memory addresses associated with the values are not deallocated because there is still a variable that references these addresses even after the original variable referencing them (x_hat may have been deleted). In this way, python is relatively safe in terms of memory referencing because it dynamically computes when memory addresses / values are no longer needed at runtime.
I was able to do this using a forward hook on the avgpool layer and saving the output on each test_step like described here :
#Define Hook:
def get_features(name):
def hook(model, input, output):
features[name] = output.detach()
return hook
Now when I load my model, I register the hook:
#Load resnet model:
def get_model():
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model.avgpool.register_forward_hook(get_features('feats')) #register the hook
return model
I did not need to change the init of the pytorch lightning model but the test step function:
FEATS = []
# placeholder for batch features
features = {}
class classifierModel(pl.LightningModule):
def __init__(self, model):
super().__init__()
self.model = model
self.learning_rate = 0.0001
def test_step(self, batch,batch_idx):
x= batch['image']
y = batch['targets']
x_hat = self.model(x)
FEATS.append(features['feats'].cpu().numpy()) #added this line to save output
now we have the output FEATS[0].shape --> (16, 2048, 1, 1) which is what i wanted to get(16 is the batchsize is use).

How to get rid of placements(SERVER or CLIENTS) so that I can transform float32#SERVER to float32?

I am trying to do learning rate decay challange of Building Your Own Federated Learning Algorithm tutorial. I have used the following code
import nest_asyncio
nest_asyncio.apply()
import collections
import attr
import functools
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
np.random.seed(0)
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()
NUM_CLIENTS = 10
BATCH_SIZE = 20
initial_lr = 0.01
decay_rate = 0.0005
minimum_lr = initial_lr/2
def preprocess(dataset):
def batch_format_fn(element):
return(tf.reshape(element['pixels'],[-1,784]),
tf.reshape(element['label'],[-1,1]))
return dataset.batch(BATCH_SIZE).map(batch_format_fn)
client_ids = np.random.choice(emnist_train.client_ids,
size=NUM_CLIENTS, replace=False)
federated_train_data = [preprocess(emnist_train.create_tf_dataset_for_client(x))
for x in client_ids]
def create_keras_model():
return tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(784,)),
tf.keras.layers.Dense(10, kernel_initializer='zeros'),
tf.keras.layers.Softmax(),
])
def model_fn():
keras_model = create_keras_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=federated_train_data[0].element_spec,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
#tf.function
def client_update(model, dataset, server_weights, client_optimizer):
client_weights = model.trainable_variables
tf.nest.map_structure(lambda x,y: x.assign(y),
client_weights, server_weights)
for batch in dataset:
with tf.GradientTape() as tape:
outputs = model.forward_pass(batch)
grads = tape.gradient(outputs.loss, client_weights)
grads = tf.clip_by_global_norm(grads, 5.0)[0]
grads_and_vars = zip(grads, client_weights)
client_optimizer.apply_gradients(grads_and_vars)
return client_weights
#tf.function
def server_update(model, mean_client_weights):
model_weights = model.trainable_variables
tf.nest.map_structure(lambda x,y: x.assign(y),
model_weights, mean_client_weights)
return model_weights
#tff.tf_computation
def server_init():
model = model_fn()
return model.trainable_variables
#tff.federated_computation
def initialize_fn():
return [tff.federated_value(server_init(), tff.SERVER), tff.federated_value(initial_lr, tff.SERVER)]
#return tff.federated_value([server_init(),initial_lr], tff.SERVER)
whimsy_model = model_fn()
tf_dataset_type = tff.SequenceType(whimsy_model.input_spec)
str(tf_dataset_type)
model_weights_type = server_init.type_signature.result
str(model_weights_type)
#tff.tf_computation(tf_dataset_type, model_weights_type,tf.float32)
def client_update_fn(tf_dataset, server_weights, LR):
model = model_fn()
client_optimizer=tf.keras.optimizers.SGD(learning_rate=LR)
return client_update(model, tf_dataset, server_weights, client_optimizer)
#tff.tf_computation(model_weights_type)
def server_update_fn(mean_client_weights):
model = model_fn()
return server_update(model, mean_client_weights)
federated_server_type = tff.FederatedType(model_weights_type,
tff.SERVER)
federated_dataset_type = tff.FederatedType(tf_dataset_type,
tff.CLIENTS)
#federated_server_type_with_LR = tff.FederatedType([model_weights_type,tff.to_type((tf.float32))],tff.SERVER)
federated_server_type_with_LR = [tff.FederatedType(model_weights_type,tff.SERVER),
tff.FederatedType(tff.to_type((tf.float32)),tff.SERVER)]
#tf.function
def decay_lr(lr):
if lr-decay_rate > minimum_lr:
return lr-decay_rate
else:
return minimum_lr
#tff.tf_computation(tf.float32)
def decay_lr_fn(lr):
return decay_lr(lr)
#tff.federated_computation(federated_server_type_with_LR, federated_dataset_type)
def next_fn(server_weights_and_LR, federated_dataset):
server_weights = server_weights_and_LR[0]
#LR_SERVER = server_weights_and_LR[1]
#LR_CLIENTS = tff.federated_broadcast(server_weights_and_LR[1])
LR = server_weights_and_LR[1]
LR_NEW = tff.federated_map(decay_lr_fn, LR)
LR_NEW_CLIENTS = tff.federated_broadcast(LR_NEW)
# Broadcast the server weights to the clients
server_weights_at_client = tff.federated_broadcast(server_weights)
# Each client computes their updated weights
client_weights = tff.federated_map(
client_update_fn, (federated_dataset, server_weights_at_client, LR_NEW_CLIENTS))
# The server averages are updated
mean_client_weights = tff.federated_mean(client_weights)
# The surver update
server_weights = tff.federated_map(server_update_fn, mean_client_weights)
#return server_weights_and_LR
return [server_weights, LR_NEW]
federated_algorithm = tff.templates.IterativeProcess(
initialize_fn=initialize_fn,
next_fn=next_fn)
sorted_client_ids = sorted(emnist_test.client_ids)
sorted_client_ids2 = sorted_client_ids[0:100]
def data(client, source=emnist_test):
return preprocess(source.create_tf_dataset_for_client(client))
central_emnist_test = (tf.data.Dataset.from_tensor_slices(
[data(client) for client in sorted_client_ids2])).flat_map(lambda x: x)
def evaluate(server_state):
keras_model = create_keras_model()
keras_model.compile(
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)
keras_model.set_weights(server_state)
keras_model.evaluate(central_emnist_test)
server_state = federated_algorithm.initialize()
evaluate(server_state[0])
for round in range(15):
print(round)
#server_state_temp = federated_algorithm.next(server_state, federated_train_data)
#server_state = [server_state_temp[0], decaying_lr(round)]
server_state = federated_algorithm.next(server_state, federated_train_data)
print(server_state[1])
evaluate(server_state[0])
This code works just fine, but I want to add the learning rate definition to server_init() function. So basically have the following
#tff.tf_computation
def server_init():
model = model_fn()
return [model.trainable_variables, initial_lr]
#tff.federated_computation
def initialize_fn():
return tff.federated_value(server_init(), tff.SERVER)
But doing so leads to following problem
The return type of `initialize_fn` must be assignable to the first input argument of `next_fn`, but:
`initialize_fn` returned type:
<<float32[784,10],float32[10]>,float32>#SERVER
and the first input argument of `next_fn` is:
<server_weights_and_LR=<<float32[784,10],float32[10]>#SERVER,float32#SERVER>,federated_dataset={<float32[?,784],int32[?,1]>*}#CLIENTS>
The problem is return [server_weights, LR_NEW] code at the end of next_fn() has <float32[784,10],float32[10]>#SERVER,float32#SERVER> type. Both server_weights and LR_NEW has already #SERVER placement. Currently
#tff.tf_computation
def server_init():
model = model_fn()
return model.trainable_variables
#tff.federated_computation
def initialize_fn():
return [tff.federated_value(server_init(), tff.SERVER), tff.federated_value(initial_lr, tff.SERVER)]
also returns <float32[784,10],float32[10]>#SERVER,float32#SERVER>
But as I said I want to change that part so to do that I want to remove the placements of server_weight and LR_NEW in next_fn and apply placement to the list containing both of those. How can I do that?
Also does anyone have a "cleaner" solution to that challenge?
EDIT:
I just want to clarify the input-output match for initialize/input and next is "cyclic". So we seek a match between output of initialize and input of next but also want one between output of next and input argument.
The first return argument of `next_fn` must be assignable to its first input argument, but found
`next_fn` which returns type:
<<float32[784,10],float32[10]>#SERVER,float32#SERVER>
which does not match its first input argument:
<<float32[784,10],float32[10]>,float32>#SERVER
The problem in your code is when manually creating federated_server_type_with_LR.
In the type system, <A#SERVER, B#SERVER> different from <A, B>#SERVER. You can convert the former to the latter by using tff.federated_zip(), which promotes the placement to the top-level.
Two solutions:
(1) Modify the decorator of next_fn to be #tff.federated_computation(tff.federated_zip(federated_server_type_with_LR), federated_dataset_type)
(2) [preferred, to avoid this kind of issue] Do not create the type manually, and read it from initialize_fn instead. The decorator would be #tff.federated_computation(initialize_fn.type_signature.result, federated_dataset_type)

How can I construct function for client selection?

I am trying to customize to average the weights of the clients by seleceting some of the clients based on each client's sorted loss sum in this link.
def run_one_round(server_state, federated_dataset):
server_message = tff.federated_map(server_message_fn, server_state)
server_message_at_client = tff.federated_broadcast(server_message)
client_outputs = tff.federated_map(
client_update_fn, (federated_dataset, server_message_at_client))
weight_denom = client_outputs.client_weight
collected_output = tff.federated_collect(client_outputs) # append
round_model_delta = tff.federated_map(selecting_fn,(collected_output,weight_denom)) #apppend
server_state = tff.federated_map(server_update_fn,(server_state, round_model_delta))
round_loss_metric = tff.federated_mean(client_outputs.model_output, weight=weight_denom)
return server_state, round_loss_metric #append
#tff.tf_computation() # append
def selecting_fn(collected_output,weight_denom):
...
...
return round_model_delta
I'm trying to use tf.math.top_k for sorting and tf.compat.v1.metrics.mean for averaging.
But It doesn't work(TypError, ValueError...).
How can I construct selecting_fn and How to convert tensor to Federatedtype???

Resources