I want to disable the backward computations in certain convolution layers in caffe, how do I do this?
I have used propagate_down setting,however find out it works for fc layer but not convolution layer.
Please help~
first update: I set propagate_down:false in test/pool_proj layer. I don't want it to backward(but other layer backward). But from the log file, it says that the layer still needs backward.
second update: Let's denote a deep learning model, there are two path from input layer to output layer, p1: A->B->C->D, p2: A->B->C1->D, A is the input layer and D is fc layer, others are conv layer. When gradient backward from D to previous layers, p1 has no different from the normal gradient-backward procedure, but for p2, it stop at C1(but the weight of C1 layer still update, it just doesn't backward its error to previous layers).
prototxt
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 224
mean_value: 104
mean_value: 117
mean_value: 123
}
data_param {
source: "/media/eric/main/data/ImageNet/ilsvrc12_train_lmdb"
batch_size: 32
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 224
mean_value: 104
mean_value: 117
mean_value: 123
}
data_param {
source: "/media/eric/main/data/ImageNet/ilsvrc12_val_lmdb"
batch_size: 50
backend: LMDB
}
}
layer {
name: "conv1/7x7_s2"
type: "Convolution"
bottom: "data"
top: "conv1/7x7_s2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 3
kernel_size: 7
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
layer {
name: "conv1/relu_7x7"
type: "ReLU"
bottom: "conv1/7x7_s2"
top: "conv1/7x7_s2"
}
layer {
name: "pool1/3x3_s2"
type: "Pooling"
bottom: "conv1/7x7_s2"
top: "pool1/3x3_s2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "pool1/norm1"
type: "LRN"
bottom: "pool1/3x3_s2"
top: "pool1/norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2/3x3_reduce"
type: "Convolution"
bottom: "pool1/norm1"
top: "conv2/3x3_reduce"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
layer {
name: "conv2/relu_3x3_reduce"
type: "ReLU"
bottom: "conv2/3x3_reduce"
top: "conv2/3x3_reduce"
}
layer {
name: "conv2/3x3"
type: "Convolution"
bottom: "conv2/3x3_reduce"
top: "conv2/3x3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 192
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
layer {
name: "conv2/relu_3x3"
type: "ReLU"
bottom: "conv2/3x3"
top: "conv2/3x3"
}
layer {
name: "conv2/norm2"
type: "LRN"
bottom: "conv2/3x3"
top: "conv2/norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool2/3x3_s2"
type: "Pooling"
bottom: "conv2/norm2"
top: "pool2/3x3_s2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "test/5x5_reduce"
type: "Convolution"
bottom: "pool2/3x3_s2"
top: "test/5x5_reduce"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
layer {
name: "test/relu_5x5_reduce"
type: "ReLU"
bottom: "test/5x5_reduce"
top: "test/5x5_reduce"
}
layer {
name: "test/5x5"
type: "Convolution"
bottom: "test/5x5_reduce"
top: "test/5x5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
pad: 2
kernel_size: 5
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
layer {
name: "test/relu_5x5"
type: "ReLU"
bottom: "test/5x5"
top: "test/5x5"
}
layer {
name: "test/pool"
type: "Pooling"
bottom: "pool2/3x3_s2"
top: "test/pool"
pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "test/pool_proj"
type: "Convolution"
bottom: "test/pool"
top: "test/pool_proj"
propagate_down:false
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
layer {
name: "test/relu_pool_proj"
type: "ReLU"
bottom: "test/pool_proj"
top: "test/pool_proj"
}
layer {
name: "test/output"
type: "Concat"
bottom: "test/5x5"
bottom: "test/pool_proj"
top: "test/output"
}
layer{
name: "test_output/pool"
type: "Pooling"
bottom: "test/output"
top: "test/output"
pooling_param{
pool: MAX
kernel_size: 28
}
}
layer {
name: "classifier"
type: "InnerProduct"
bottom: "test/output"
top: "classifier"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1000
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss3"
type: "SoftmaxWithLoss"
bottom: "classifier"
bottom: "label"
top: "loss3"
loss_weight: 1
}
layer {
name: "top-1"
type: "Accuracy"
bottom: "classifier"
bottom: "label"
top: "top-1"
include {
phase: TEST
}
}
layer {
name: "top-5"
type: "Accuracy"
bottom: "classifier"
bottom: "label"
top: "top-5"
include {
phase: TEST
}
accuracy_param {
top_k: 5
}
}
log
I1116 15:44:04.405261 19358 net.cpp:226] loss3 needs backward computation.
I1116 15:44:04.405283 19358 net.cpp:226] classifier needs backward computation.
I1116 15:44:04.405302 19358 net.cpp:226] test_output/pool needs backward computation.
I1116 15:44:04.405320 19358 net.cpp:226] test/output needs backward computation.
I1116 15:44:04.405339 19358 net.cpp:226] test/relu_pool_proj needs backward computation.
I1116 15:44:04.405357 19358 net.cpp:226] test/pool_proj needs backward computation.
I1116 15:44:04.405375 19358 net.cpp:228] test/pool does not need backward computation.
I1116 15:44:04.405395 19358 net.cpp:226] test/relu_5x5 needs backward computation.
I1116 15:44:04.405412 19358 net.cpp:226] test/5x5 needs backward computation.
I1116 15:44:04.405431 19358 net.cpp:226] test/relu_5x5_reduce needs backward computation.
I1116 15:44:04.405448 19358 net.cpp:226] test/5x5_reduce needs backward computation.
I1116 15:44:04.405468 19358 net.cpp:226] pool2/3x3_s2_pool2/3x3_s2_0_split needs backward computation.
I1116 15:44:04.405485 19358 net.cpp:226] pool2/3x3_s2 needs backward computation.
I1116 15:44:04.405505 19358 net.cpp:226] conv2/norm2 needs backward computation.
I1116 15:44:04.405522 19358 net.cpp:226] conv2/relu_3x3 needs backward computation.
I1116 15:44:04.405542 19358 net.cpp:226] conv2/3x3 needs backward computation.
I1116 15:44:04.405560 19358 net.cpp:226] conv2/relu_3x3_reduce needs backward computation.
I1116 15:44:04.405578 19358 net.cpp:226] conv2/3x3_reduce needs backward computation.
I1116 15:44:04.405596 19358 net.cpp:226] pool1/norm1 needs backward computation.
I1116 15:44:04.405616 19358 net.cpp:226] pool1/3x3_s2 needs backward computation.
I1116 15:44:04.405632 19358 net.cpp:226] conv1/relu_7x7 needs backward computation.
I1116 15:44:04.405652 19358 net.cpp:226] conv1/7x7_s2 needs backward computation.
I1116 15:44:04.405670 19358 net.cpp:228] data does not need backward computation.
I1116 15:44:04.405705 19358 net.cpp:270] This network produces output loss3
I1116 15:44:04.405745 19358 net.cpp:283] Network initialization done.
From Evan Shelhamer (https://groups.google.com/forum/#!topic/caffe-users/54Z-B-CXmLE):
propagate_down is intended to switch off backprop along certain paths
from the loss while not entirely turning off layers earlier in the
graph. If gradients propagate to a layer by another path, or
regularization such as weight decay is not disabled, the parameters of
these layers will still be updated. I suspect decay is still on for
these layers, so you could set decay_mult: 0 for the weights and
biases.
Setting lr_mult: 0 on the other hand fixes parameters and skips
backprop where it is unnecessary.
You have decay_mult: 1 in some of the early layers, so the gradients are still calculated. Set lr_mult: 0 in all of the layers that you don't want the weights updated.
For example, change the following:
layer {
name: "conv1/7x7_s2"
type: "Convolution"
bottom: "data"
top: "conv1/7x7_s2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 3
kernel_size: 7
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
to
layer {
name: "conv1/7x7_s2"
type: "Convolution"
bottom: "data"
top: "conv1/7x7_s2"
param {
lr_mult: 0
decay_mult: 1
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 3
kernel_size: 7
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
Also for reference:
https://github.com/BVLC/caffe/issues/4984
Related
I'm always getting the same accuracy. When i run the classification, its always showing 1 label. I went through many articles and everyone recommending to shuffle the data. I did that using random.shuffle and also tried convert_imageset script as well but no help. Please find my solver.protoxt and caffenet_train.prototxt below. I have 1000 images in my dataset. 833 images in train_lmdb and rest of them in validation_lmdb.
Training logs:
I1112 22:41:26.373661 10633 solver.cpp:347] Iteration 1184, Testing net (#0)
I1112 22:41:26.828955 10633 solver.cpp:414] Test net output #0: accuracy = 1
I1112 22:41:26.829105 10633 solver.cpp:414] Test net output #1: loss = 4.05117e-05 (* 1 = 4.05117e-05 loss)
I1112 22:41:26.952340 10656 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:28.697041 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:30.889508 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:32.288192 10633 solver.cpp:347] Iteration 1200, Testing net (#0)
I1112 22:41:32.716845 10633 solver.cpp:414] Test net output #0: accuracy = 1
I1112 22:41:32.716941 10633 solver.cpp:414] Test net output #1: loss = 4.08e-05 (* 1 = 4.08e-05 loss)
I1112 22:41:32.861697 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:33.050954 10633 solver.cpp:239] Iteration 1200 (2.6885 iter/s, 18.5978s/50 iters), loss = 0.000119432
I1112 22:41:33.051054 10633 solver.cpp:258] Train net output #0: loss = 0.000119432 (* 1 = 0.000119432 loss)
I1112 22:41:33.051067 10633 sgd_solver.cpp:112] Iteration 1200, lr = 1e-15
I1112 22:41:35.700759 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:37.869782 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:38.169018 10633 solver.cpp:347] Iteration 1216, Testing net (#0)
I1112 22:41:38.396162 10656 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:38.613301 10633 solver.cpp:414] Test net output #0: accuracy = 1
I1112 22:41:38.613348 10633 solver.cpp:414] Test net output #1: loss = 4.09327e-05 (* 1 = 4.09327e-05 loss)
solver.prototxt:
net: "caffenet_train.prototxt"
test_iter: 16
test_interval: 16
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 100
display: 50
max_iter: 2000
momentum: 0.9
weight_decay: 0.0005
snapshot: 500
snapshot_prefix: "output/caffe_model"
solver_mode: GPU
caffenet_train.prototxt
name: "CaffeNet"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 227
mean_file: "output/mean.binaryproto"
}
data_param {
source: "output/train_lmdb"
batch_size: 128
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 227
mean_file: "output/mean.binaryproto"
}
data_param {
source: "output/validation_lmdb"
batch_size: 10
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
Try finetuning with CaffeNet's original caffemodel.
Then it will be solved.
The initial loss which is computed when my model is run on the image dataset is remaining constant everytime I run caffe. Isn't the behavior strange , since the initial loss should be different (atleast slightly) when we run the model every-time.
Also the loss value is remaining constant across the further iterations if I give SoftmaxWithLoss function in the loss layer. Also, if I give lr_mult=0 in the deconvolution layer, the loss is appearing constant across iterations. If I give lr_mult=1, then the loss value changes , but finally will meet the intial loss value after few thousand iterations.
Any ideas in this regard?
Any help is highly appreciated in this regard.
The following is my solver file:
test_iter: 100
test_interval: 100 # py solving tests
display: 100
#average_loss: 100
lr_policy: "fixed"
base_lr: 0.0000001
momentum: 0.5
iter_size: 1
# base_lr: 1e-9
# momentum: 0.99
# iter_size: 1
max_iter: 1000000
#weight_decay: 0.0005
snapshot: 1000
snapshot_diff: true
#test_initialization: false
solver_mode: GPU
The following is the architecture of my convolutional neural network.
# Input 128 640 3
# Conv1 (kernel=3) 126 638 64
# ReLU
# Conv2 (kernel=3) 124 636 128
# ReLU
# Conv3 (kernel=3) 122 634 256
# ReLU
# Pool1 (kernel=2) 61 317 256
# Conv4 (kernel=4) 58 314 512
# ReLU
# Conv5 (kernel=4) 55 311 1024
# ReLU
# Conv6 (kernel=4) 52 308 512
# ReLU
# Pool2 (kernel=2) 26 154 512
# Conv7 (kernel=4,stride=2,pad=3) 15 79 5
# ReLU
# Decon1 (kernel=16,stride=8,pad=3) 128 640 5
# ReLU
# Loss
name: "Conv-Deconv-Net"
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TRAIN
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_IMG_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TRAIN
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_LBL_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TEST
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_IMG_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TEST
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_LBL_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
pad: 0
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "relu1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "relu1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "relu2"
}
layer {
name: "conv3"
type: "Convolution"
bottom: "relu2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "relu3"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "relu3"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool1"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "relu4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "relu4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "relu5"
}
layer {
name: "conv6"
type: "Convolution"
bottom: "relu5"
top: "conv6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "relu6"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "relu6"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "pool2"
top: "conv7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 5
pad: 3
kernel_size: 4
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "conv7"
top: "relu7"
}
layer {
name: "deconv1"
type: "Deconvolution"
bottom: "relu7"
top: "deconv1"
param {
lr_mult: 1
}
convolution_param {
num_output: 5
bias_term: false
kernel_size: 16
stride: 8
pad: 0
weight_filler {
type: "bilinear"
}
}
}
layer {
name: "relu8"
type: "ReLU"
bottom: "deconv1"
top: "relu8"
}
#layer {
# name: "crop"
# type: "Crop"
# bottom: "deconv3"
# bottom: "data"
# top: "score"
#}
layer {
name: "prob"
type: "SoftmaxWithLoss"
bottom: "relu8"
bottom: "label"
top: "loss"
loss_param {
# ignore_label: 255
# normalize: true
normalize: false
}
}
It seems your base_lr is too small. Keep it at 0.00001 in the beginning and try keeping your momentum at 0.9. If your learning rate is too small, convergence will be very slow and if it is too high gradient descent will overshoot the local minima (that's when you see your loss shoots up). So you have to come at an optimal value, iteratively. There is no magic number for this.
I'm working with caffe framework and I would like to train the next network:
When I execute the next command:
caffe train --solver solver.prototxt
The error it throws:
`F0802 14:31:54.506695 28038 insert_splits.cpp:29] Unknown bottom blob 'image' (layer 'conv1', bottom index 0)
*** Check failure stack trace: ***
# 0x7ff2941c3f9d google::LogMessage::Fail()
# 0x7ff2941c5e03 google::LogMessage::SendToLog()
# 0x7ff2941c3b2b google::LogMessage::Flush()
# 0x7ff2941c67ee google::LogMessageFatal::~LogMessageFatal()
# 0x7ff2947cedbe caffe::InsertSplits()
# 0x7ff2948306de caffe::Net<>::Init()
# 0x7ff294833a81 caffe::Net<>::Net()
# 0x7ff29480ce6a caffe::Solver<>::InitTestNets()
# 0x7ff29480ee85 caffe::Solver<>::Init()
# 0x7ff29480f19a caffe::Solver<>::Solver()
# 0x7ff2947f4343 caffe::Creator_SGDSolver<>()
# 0x40b1a0 (unknown)
# 0x407373 (unknown)
# 0x7ff292e40741 __libc_start_main
# 0x407b79 (unknown)
Abortado (`core' generado)
The code is (train2.prototxt):
name: "xxxxxx"
layer {
name: "image"
type: "HDF5Data"
top: "image"
top: "label"
hdf5_data_param {
source: "h5a.train.h5.txt"
batch_size: 64
}
include {
phase: TRAIN
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "image"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool1"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv3"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "improd3"
type: "InnerProduct"
bottom: "pool2"
top: "improd3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1000
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "improd3"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "improd3"
bottom: "label"
top: "loss"
}
The solver.prototxt:
net: "train2.prototxt"
test_iter: 100
test_interval: 1000
# lr for fine-tuning should be lower than when starting from scratch
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
# stepsize should also be lower, as we're closer to being done
stepsize: 20000
display: 20
max_iter: 100000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "caffe"
solver_mode: CPU
I'm stuck and i cant start the training of the network because this problem.
It is because, even if you are trying to execute the Train phase, the Test phase will also be used for validation. As there is no input data layer for the Test phase, the conv1 layer cannot find the input blob image. The Test phase is being called because you have defined test_* parameters in the solver and phase: TEST in some of the layers in train2.prototxt. Removing the above mentioned parameters from solver and the layers representing the TEST phase will help you run the training without any issues.
I have been following a Caffe example here to plot the Convolution kernels from my ConvNet. I have attached an image below of my kernels, however it looks nothing like the kernels in the example. I have followed the example exactly, anyone know what the issue may be?
My net is trained on a set of simulated images (with two classes) and the performance of the net is pretty good, around 80% test accuracy.
layer {
name: "input"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mean_file: "/tmp/stage5/mean/mean.binaryproto"
}
data_param {
source: "/tmp/stage5/train/train-lmdb"
batch_size: 100
backend: LMDB
}
}
layer {
name: "input"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mean_file: "/tmp/stage5/mean/mean.binaryproto"
}
data_param {
source: "/tmp/stage5/validation/validation-lmdb"
batch_size: 10
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 40
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool1"
top: "ip1"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "loss"
}
Well, you might need to set the interpolation parameter to 'none' when you call imshow. Is that what you are referring to?
To get "smoother" filters you could try to add a small amount of L2 weight-decay (decay_mult) to the conv1 layer.
See also http://caffe.berkeleyvision.org/tutorial/layers.html
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
# learning rate and decay multipliers for the filters
param { lr_mult: 1 decay_mult: 1 }
# learning rate and decay multipliers for the biases
param { lr_mult: 2 decay_mult: 0 }
convolution_param {
num_output: 96 # learn 96 filters
kernel_size: 11 # each filter is 11x11
stride: 4 # step 4 pixels between each filter application
weight_filler {
type: "gaussian" # initialize the filters from a Gaussian
std: 0.01 # distribution with stdev 0.01 (default mean: 0)
}
bias_filler {
type: "constant" # initialize the biases to zero (0)
value: 0
}
}
}
I had the Caffe C++ example program working on my computer, but after recently recompiling Caffe, I've encountered this error when I try to run the program:
[libprotobuf ERROR google/protobuf/text_format.cc:245] Error parsing
text-format caffe.NetParameter: 2:4: Message type "caffe.NetParameter"
has no field named "net".
upgrade_proto.cpp:928] Check failed: ReadProtoFromTextFile(param_file,
param) Failed to parse NetParameter file:
/home/jack/Desktop/beeshiny/deploy.prototxt
Am I missing something or has the syntax of the prototxt files been changed? My deploy.prototxt file (that I pass to the C++ program) looks like this:
# The train/test net protocol buffer definition
net: "/home/jack/Desktop/beeshiny/deploy_arch.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "lenet"
# solver mode: CPU or GPU
solver_mode: CPU
The contents of the deploy_arch.prototxt file referenced in the prototxt file above:
name: "LeNet"
input: "data"
input_shape {
dim: 10
dim: 1
dim: 24
dim: 24
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "loss"
type: "Softmax"
bottom: "ip2"
top: "loss"
}
I don't understand why this has stopped working all of a sudden, unless an update has made my prototxt file obsolete?
I solved my problem by adding caffe/python in $PYTHONPATH.