Related
I have this model:
unary = Sequential([
Conv2D(filters=32, kernel_size=(3, 3), activation='relu',input_shape = (32,640,3)),
Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
Conv2D(filters=256, kernel_size=(3, 3), activation='relu'),
Flatten(),
Dense(1024,activation='relu'),
Dense(4, activation='softmax')
])
unary.summary()
When I am trying it to Predict for further classification I am getting this error:
ValueError: Input 0 of layer sequential_15 is incompatible with the layer: expected axis -1 of input shape to have value 3 but received input with shape (None, 32, 640, 3, 1)
Full Error Traceback:
--------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_23/1300371096.py in <module>
----> 1 x_train, y_train = get_crf_training_data()
/tmp/ipykernel_23/2273784861.py in get_crf_training_data()
14 x_train_u, y_train_u = get_unary_data_for_page(annotation_filename, cnn=False)
15 x_train_p, _ = get_pairwise_data_for_page(annotation_filename)
---> 16 unary_potential_list = np.array(get_unary_potentials(x_train_u))
17 pairwise_potential_list = np.array(get_pairwise_potentials(x_train_p))
18
/tmp/ipykernel_23/3666745350.py in get_unary_potentials(x)
2 unary = tf.keras.models.load_model('./unary/')
3 x = np.expand_dims(x,axis = -1)
----> 4 return unary.predict(x)
How to resolve this dimension problem?
To reproduce
I am running the MAML (with higher) meta-learning algorithm with a resnet. I see this gives issues in my script (error message pasted bellow).
Is Adafactor not suppose to work with Resnets or other models?
Steps to reproduce the behavior:
run this code: https://github.com/brando90/higher/blob/master/examples/maml-omniglot.py (it already has adafactor)
if that works uncomment the resnet12 line and ping me please
Expected behavior
I expect training to go smoothly but isntead get:
--------------------- META-TRAIN ------------------------
Starting training!
Traceback (most recent call last):
File "/home/miranda9/automl-meta-learning/automl-proj-src/experiments/meta_learning/main_metalearning.py", line 441, in <module>
main_resume_from_checkpoint(args)
File "/home/miranda9/automl-meta-learning/automl-proj-src/experiments/meta_learning/main_metalearning.py", line 403, in main_resume_from_checkpoint
run_training(args)
File "/home/miranda9/automl-meta-learning/automl-proj-src/experiments/meta_learning/main_metalearning.py", line 413, in run_training
meta_train_fixed_iterations(args)
File "/home/miranda9/automl-meta-learning/automl-proj-src/meta_learning/training/meta_training.py", line 233, in meta_train_fixed_iterations
args.outer_opt.step()
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/torch/optim/optimizer.py", line 88, in wrapper
return func(*args, **kwargs)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/transformers/optimization.py", line 577, in step
update = self._approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/transformers/optimization.py", line 508, in _approx_sq_grad
return torch.mm(r_factor.unsqueeze(-1), c_factor.unsqueeze(0))
RuntimeError: mat1 must be a matrix, got 4-D tensor
full error output:
('PID', '25721')
('always_use_deterministic_algorithms', False)
('args_hardcoded_in_script', False)
('base_model_mode', 'resnet12_rsf')
('best_val_loss', inf)
('condor_jobid', -1)
('copy_initial_weights', False)
('current_logs_path', '/home/miranda9/data/logs/logs_Nov05_15-44-03_jobid_668')
('current_time', 'Nov30_08-42-53')
('data_path', 'miniimagenet')
('debug', False)
('debug_test', False)
('device', device(type='cuda'))
('epoch_num', -1)
('eval_iters', 2)
('experiment_name', 'debug')
('fo', False)
('force_log', True)
('githash', '9af491c')
('githash_long', '9af491ccd13fa88f4d07287f54305488ba4967fc')
('githash_short', '9af491c')
('gpu_name', 'NVIDIA GeForce GTX TITAN X')
('grad_clip_mode', None)
('grad_clip_rate', None)
('hostname', 'vision-02.cs.illinois.edu')
('inner_debug_eval', False)
('inner_debug_train', False)
('inner_lr', 0.1)
('it', 0)
('jobid', 10340)
('k_eval', 15)
('k_shots', 5)
('log_root', PosixPath('/home/miranda9/data/logs/logs_Nov30_08-42-53_jobid_10340'))
('log_to_wandb', True)
('log_train_freq', 200)
('log_val_freq', 200)
('logger', <uutils.logger.Logger object at 0x2b832f5eff70>)
('logging', True)
('mail_user', 'brando.science#gmail.com')
('master_port', '37126')
('meta_batch_size_eval', 2)
('meta_batch_size_train', 2)
('meta_learner', 'maml_fixed_inner_lr')
('metrics_as_dist', False)
('my_stdout_filepath', '/home/miranda9/data/logs/logs_Nov05_15-44-03_jobid_668/my_stdout.log')
('n_classes', 5)
('nb_inner_train_steps', 4)
('nccl', 2708)
('num_epochs', -1)
('num_its', 3)
('num_workers', 4)
('outer_debug', False)
('outer_lr', 0.001)
('path_to_checkpoint', PosixPath('/home/miranda9/data_folder_fall2020_spring2021/logs/nov_all_mini_imagenet_expts/logs_Nov05_15-44-03_jobid_668'))
('pin_memory', False)
('pw_path', '/home/miranda9/pw_app.config.json')
('rank', -1)
('run_name', 'debug (Adafactor) : args.jobid=10340')
('save_ckpt', True)
('seed', None)
('serial', False)
('show_layerwise_sims', False)
('sim_compute_parallel', False)
('slurm_array_task_id', -1)
('slurm_jobid', 10340)
('split', 'train')
('tb', True)
('track_higher_grads', True)
('train_iters', 500000)
('trainin_with_epochs', False)
('training_mode', 'iterations')
('wandb_entity', 'brando')
('wandb_group', 'experiment_debug')
('wandb_project', 'sl_vs_ml_iclr_workshop_paper')
------- Main Resume from Checkpoint --------
args.base_model=ResNet(
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(64, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(160, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(320, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(320, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(avgpool): AdaptiveAvgPool2d(output_size=1)
(dropout): Dropout(p=0.0, inplace=False)
(classifier): Linear(in_features=640, out_features=5, bias=True)
)
args.outer_opt=Adafactor (
Parameter Group 0
beta1: None
clip_threshold: 1.0
decay_rate: -0.8
eps: (1e-30, 0.001)
lr: None
relative_step: True
scale_parameter: True
warmup_init: True
weight_decay: 0.0
)
args.meta_learner=MAMLMetaLearner(
(base_model): ResNet(
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(64, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(160, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(320, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.1)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn3): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(downsample): Sequential(
(0): Conv2d(320, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(DropBlock): DropBlock()
)
)
(avgpool): AdaptiveAvgPool2d(output_size=1)
(dropout): Dropout(p=0.0, inplace=False)
(classifier): Linear(in_features=640, out_features=5, bias=True)
)
)
args.scheduler=None
--------------------- META-TRAIN ------------------------
Starting training!
Traceback (most recent call last):
File "/home/miranda9/automl-meta-learning/automl-proj-src/experiments/meta_learning/main_metalearning.py", line 441, in <module>
main_resume_from_checkpoint(args)
File "/home/miranda9/automl-meta-learning/automl-proj-src/experiments/meta_learning/main_metalearning.py", line 403, in main_resume_from_checkpoint
run_training(args)
File "/home/miranda9/automl-meta-learning/automl-proj-src/experiments/meta_learning/main_metalearning.py", line 413, in run_training
meta_train_fixed_iterations(args)
File "/home/miranda9/automl-meta-learning/automl-proj-src/meta_learning/training/meta_training.py", line 233, in meta_train_fixed_iterations
args.outer_opt.step()
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/torch/optim/optimizer.py", line 88, in wrapper
return func(*args, **kwargs)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/transformers/optimization.py", line 577, in step
update = self._approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/transformers/optimization.py", line 508, in _approx_sq_grad
return torch.mm(r_factor.unsqueeze(-1), c_factor.unsqueeze(0))
RuntimeError: mat1 must be a matrix, got 4-D tensor
related:
https://github.com/huggingface/transformers/issues/14574
https://github.com/facebookresearch/higher/issues/124
Adafactor from transformers hugging face only works with Transfromers - does it not work with Resnets and MAML with higher?
https://www.reddit.com/r/pytorch/comments/r5p2pk/adafactor_from_transformers_hugging_face_only/
I have recently been implementing a model based on OpenPose. In OpenPose, it uses VGG as its backbone model to extract feature maps, but VGG contains max pooling layers and that will reduce the shape of the output to 1/4. Here is the model structure of OpenPose:
VGGOpenPose(
(model0): OpenPose_Feature(
(model): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): ReLU(inplace=True)
(18): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(24): ReLU(inplace=True)
(25): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(26): ReLU(inplace=True)
)
)
(model1_1): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): ReLU(inplace=True)
(6): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(512, 38, kernel_size=(1, 1), stride=(1, 1))
)
(model2_1): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 38, kernel_size=(1, 1), stride=(1, 1))
)
(model3_1): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 38, kernel_size=(1, 1), stride=(1, 1))
)
(model4_1): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 38, kernel_size=(1, 1), stride=(1, 1))
)
(model5_1): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 38, kernel_size=(1, 1), stride=(1, 1))
)
(model6_1): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 38, kernel_size=(1, 1), stride=(1, 1))
)
(model1_2): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): ReLU(inplace=True)
(6): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(512, 19, kernel_size=(1, 1), stride=(1, 1))
)
(model2_2): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 19, kernel_size=(1, 1), stride=(1, 1))
)
(model3_2): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 19, kernel_size=(1, 1), stride=(1, 1))
)
(model4_2): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 19, kernel_size=(1, 1), stride=(1, 1))
)
(model5_2): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 19, kernel_size=(1, 1), stride=(1, 1))
)
(model6_2): Sequential(
(0): Conv2d(185, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(1): ReLU(inplace=True)
(2): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(3): ReLU(inplace=True)
(4): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(5): ReLU(inplace=True)
(6): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(7): ReLU(inplace=True)
(8): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(128, 19, kernel_size=(1, 1), stride=(1, 1))
)
)
In the origin paper, it says the groundtruth heatmap and paf is of the same width and height as the input image.
OpenPose: Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields
And I have searched for a few implementations of OpenPose in Python. Most of them use element-wise loss function to calculate the loss between the output and groundtruth label, just the same as the function mentioned in the Paper:
loss function in openpose
I was wondering whether the output of OpenPose is of different size as the input image and if it is how is OpenPose calculating the loss function between the output and the groundtruth heatmap/paf?
I trained and tested an image classifier (Resnet34, Fast.ai, 3 classes) using pytorch and learn.predict() works as expected. When I convert pytorch -> onnx -> mlmodel it predicts the same softmax values regardless of the image I submit.
Here's my pytorch model:
Sequential(
(0): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(6): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(4): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(5): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(7): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
(1): Sequential(
(0): AdaptiveConcatPool2d(
(ap): AdaptiveAvgPool2d(output_size=1)
(mp): AdaptiveMaxPool2d(output_size=1)
)
(1): Flatten()
(2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.25, inplace=False)
(4): Linear(in_features=1024, out_features=512, bias=True)
(5): ReLU(inplace=True)
(6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): Dropout(p=0.5, inplace=False)
(8): Linear(in_features=512, out_features=3, bias=True)
)
)
To convert it to .onnx, I need to first normalize the image data and flatten it. I found this tutorial, which worked on a previous version of fastai/onnx-coreml. I do this with the following class:
sz = (960,540)
class ImageScale(nn.Module):
def __init__(self):
super().__init__()
self.denominator = torch.full((3, sz[0], sz[1]), 255.0, device=torch.device("cuda"))
def forward(self, x): return torch.div(x, self.denominator).unsqueeze(0)
To construct the entire model, I concatenate my ImageScale layer, the model, and a softmax function like this:
final_model = [ImageScale()] + [learn.model] + [nn.Softmax(dim=-1)]
final_model = nn.Sequential(*final_model)
Which ends up looking like this:
Sequential(
(0): ImageScale()
(1): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(6): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(4): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(5): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(7): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
(2): Sequential(
(0): AdaptiveConcatPool2d(
(ap): AdaptiveAvgPool2d(output_size=1)
(mp): AdaptiveMaxPool2d(output_size=1)
)
(1): Flatten()
(2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.25, inplace=False)
(4): Linear(in_features=1024, out_features=512, bias=True)
(5): ReLU(inplace=True)
(6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): Dropout(p=0.5, inplace=False)
(8): Linear(in_features=512, out_features=3, bias=True)
)
(3): Softmax(dim=-1)
)
I convert to .onnx like this:
dummy_input = Variable(torch.randn(3, sz[0], sz[1])).cuda()
torch.onnx.export(final_model, dummy_input, 'model.onnx', input_names = ['input'], output_names =['output'], verbose=True)
And I convert from .onnx to .mlmodel like this:
model_file = open('model.onnx', 'rb')
model_proto = onnx_pb.ModelProto()
model_proto.ParseFromString(model_file.read())
coreml_model = convert(model_proto, image_input_names = ['image'], mode='classifier', class_labels="labels.txt")
coreml_model.save('model.mlmodel')
When I call predict using coremltools, I get the same output regardless of the image I input:
import coremltools
from PIL import Image
model = coremltools.models.MLModel('model.mlmodel')
img = Image.open('img.jpg')
preds = model.predict({'image': img})
# preds: {'output': {'class1': 0.011085365898907185, 'class2': 0.9794686436653137, 'class2': 0.009446004405617714}, 'classLabel': 'class2'}
img2 = Image.open('img2.jpg')
preds = model.predict({'image': img2})
# preds: {'output': {'class1': 0.011085365898907185, 'class2': 0.9794686436653137, 'class2': 0.009446004405617714}, 'classLabel': 'class2'}
Possible issues:
1. I'm not setting up the Sequential correctly before converting
2. Coreml or onnx cannot handle non-square images
I've tried a bunch of different inputs, but keep getting the same so any help would be much appreciated!
Here are screen shots of my head and tail from netron:
Head:
Tail:
Yon need to call final_model.eval() before exporting to ONNX, otherwise the model is in default train mode and all dropout layers will be enabled.
I added target_ios=13 to my list of parameters (which required updating to MacOS Version 10.15) and it worked.
from onnx_coreml import convert
ml_model = convert(model='model.onnx', target_ios='13')
I am training a dnn (CRNN) with Pytorch, but some abnormal things happened in terms of loss val.
The program can print avg_loss for every 20 batches and save the model_parameters every 100 batches. And the initial loss is about 20-30. Some problems happened in my program, so the training process is interrupted. After loading the parameters from the saved model, I continue training but find the initial loss still start from 20-30. By the way, I have a dataset about 10 million pictures and I have trained about 3 million of them.
I want to figure about where the problem is, pytorch mechanism or program bugs.
Here is more detailed:
1. CRNN structure:
CRNN (
(cnn): Sequential (
(conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu0): ReLU (inplace)
(pooling0): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU (inplace)
(pooling1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
(conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
(relu2): ReLU (inplace)
(conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu3): ReLU (inplace)
(pooling2): MaxPool2d (size=(2, 2), stride=(2, 1), dilation=(1, 1))
(conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(batchnorm4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
(relu4): ReLU (inplace)
(conv5): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu5): ReLU (inplace)
(pooling3): MaxPool2d (size=(2, 2), stride=(2, 1), dilation=(1, 1))
(conv6): Conv2d(512, 512, kernel_size=(2, 2), stride=(1, 1))
(batchnorm6): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
(relu6): ReLU (inplace)
)
(rnn): Sequential (
(0): BidirectionalLSTM (
(rnn): LSTM(512, 256, bidirectional=True)
(embedding): Linear (512 -> 256)
)
(1): BidirectionalLSTM (
(rnn): LSTM(256, 256, bidirectional=True)
(embedding): Linear (512 -> 5530)
)
)
)
2. model init and parameters loading.
def crnnSource():
alphabet = keys.alphabet
converter = util.strLabelConverter(alphabet)
model = crnn.CRNN(32, 1 ,len(alphabet)+1, 256, 1) #need 1?
model.apply(weights_init)
path = './models/crnn_OCR.pkl'
model.load_state_dict(torch.load(path))
return model, converter
3. training code
def trainProc(net ,trainset, converter):
print ("--------------------------------")
print ("Start to Train.")
criterion = CTCLoss().cuda()
loss_avg = util.averager()
optimizer = optim.RMSprop(net.parameters(), lr = 0.001)
image = torch.FloatTensor(BATCH_SIZE, 3, 32, 100) #opt.imgH
text = torch.IntTensor(BATCH_SIZE * 5)
length = torch.IntTensor(BATCH_SIZE)
image = image.cuda()
image = Variable(image)
text = Variable(text)
length = Variable(length)
sav_inv = 0
for epoch in range(TRAIN_EPOCHS):
sav_inv = 0
timer = time.time()
for i,data in enumerate(trainset, 0):
img, txt = data
img = ConvtFileToTensor(img)
batch_size = img.size(0)
util.loadData(image, img)
t, l = converter.encode(txt)
util.loadData(text,t)
util.loadData(length,l)
preds = net(image)
preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
cost = criterion(preds, text, preds_size, length) / batch_size
net.zero_grad()
cost.backward()
optimizer.step()
loss_avg.add(cost)
#running_loss += loss.data[0]
if i % 20 == 19:
time2 = time.time()
print ("[%d, %5d] loss: %.6f TIME: %.6f" %(epoch+1, i+1, loss_avg.val(),time2 - timer))
print (cost)
loss_avg.reset()
timer = time.time()
if sav_inv == SAV_INV-1:
torch.save(net.state_dict(),'./models/crnn_OCR.pkl')
sav_inv = 0
else:
sav_inv += 1
torch.save(net.state_dict(),'./models/crnn_OCR.pkl')
print ("Finished Training.")
return net