I'm always getting the same accuracy. When i run the classification, its always showing 1 label. I went through many articles and everyone recommending to shuffle the data. I did that using random.shuffle and also tried convert_imageset script as well but no help. Please find my solver.protoxt and caffenet_train.prototxt below. I have 1000 images in my dataset. 833 images in train_lmdb and rest of them in validation_lmdb.
Training logs:
I1112 22:41:26.373661 10633 solver.cpp:347] Iteration 1184, Testing net (#0)
I1112 22:41:26.828955 10633 solver.cpp:414] Test net output #0: accuracy = 1
I1112 22:41:26.829105 10633 solver.cpp:414] Test net output #1: loss = 4.05117e-05 (* 1 = 4.05117e-05 loss)
I1112 22:41:26.952340 10656 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:28.697041 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:30.889508 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:32.288192 10633 solver.cpp:347] Iteration 1200, Testing net (#0)
I1112 22:41:32.716845 10633 solver.cpp:414] Test net output #0: accuracy = 1
I1112 22:41:32.716941 10633 solver.cpp:414] Test net output #1: loss = 4.08e-05 (* 1 = 4.08e-05 loss)
I1112 22:41:32.861697 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:33.050954 10633 solver.cpp:239] Iteration 1200 (2.6885 iter/s, 18.5978s/50 iters), loss = 0.000119432
I1112 22:41:33.051054 10633 solver.cpp:258] Train net output #0: loss = 0.000119432 (* 1 = 0.000119432 loss)
I1112 22:41:33.051067 10633 sgd_solver.cpp:112] Iteration 1200, lr = 1e-15
I1112 22:41:35.700759 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:37.869782 10655 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:38.169018 10633 solver.cpp:347] Iteration 1216, Testing net (#0)
I1112 22:41:38.396162 10656 data_layer.cpp:73] Restarting data prefetching from start.
I1112 22:41:38.613301 10633 solver.cpp:414] Test net output #0: accuracy = 1
I1112 22:41:38.613348 10633 solver.cpp:414] Test net output #1: loss = 4.09327e-05 (* 1 = 4.09327e-05 loss)
solver.prototxt:
net: "caffenet_train.prototxt"
test_iter: 16
test_interval: 16
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
stepsize: 100
display: 50
max_iter: 2000
momentum: 0.9
weight_decay: 0.0005
snapshot: 500
snapshot_prefix: "output/caffe_model"
solver_mode: GPU
caffenet_train.prototxt
name: "CaffeNet"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 227
mean_file: "output/mean.binaryproto"
}
data_param {
source: "output/train_lmdb"
batch_size: 128
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 227
mean_file: "output/mean.binaryproto"
}
data_param {
source: "output/validation_lmdb"
batch_size: 10
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
Try finetuning with CaffeNet's original caffemodel.
Then it will be solved.
Related
I am using caffe to train AlexNet
name: "AlexNet"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mean_file: "data/cropCenter/cropCenter_mean.binaryproto"
}
data_param {
source: "examples/cropCenter/cropCenter_train_lmdb"
batch_size: 256
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mean_file: "data/cropCenter/cropCenter_mean.binaryproto"
}
data_param {
source: "examples/cropCenter/cropCenter_val_lmdb"
batch_size: 128
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
My data is small around 3k training samples and 0.7k validation set but it has big pixel size aroud 3000x4000
I cropped the center of each image to have focus on my data and Augmented it (rotation, cropping, scaling, flipping, blurring ....etc a lot of functions) and I reached 300k training samples and 70k validation samples
My problem is that the training loss is decreasing so well but the validation loss is increasing and I don't know why
Here is my solver file
net: "models/cropCenter/train_val.prototxt"
test_iter: 602
test_interval: 2000
base_lr: 0.01
lr_policy: "step"
gamma: 0.1
stepsize: 25000
display: 20
max_iter: 111771
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/cropCenter/caffe_alexnet_cropCenter_train"
solver_mode: GPU
And here is a snapshot from the log file
I0309 18:31:48.157299 11157 sgd_solver.cpp:105] Iteration 7880, lr = 0.01
I0309 18:31:59.188576 11157 solver.cpp:218] Iteration 7900 (1.81303 iter/s, 11.0313s/20 iters), loss = 0.095763
I0309 18:31:59.188653 11157 solver.cpp:237] Train net output #0: loss = 0.0957631 (* 1 = 0.0957631 loss)
I0309 18:31:59.188664 11157 sgd_solver.cpp:105] Iteration 7900, lr = 0.01
I0309 18:32:10.279839 11157 solver.cpp:218] Iteration 7920 (1.80324 iter/s, 11.0911s/20 iters), loss = 0.0490094
I0309 18:32:10.279883 11157 solver.cpp:237] Train net output #0: loss = 0.0490094 (* 1 = 0.0490094 loss)
I0309 18:32:10.279912 11157 sgd_solver.cpp:105] Iteration 7920, lr = 0.01
I0309 18:32:21.498523 11157 solver.cpp:218] Iteration 7940 (1.78275 iter/s, 11.2186s/20 iters), loss = 0.0937675
I0309 18:32:21.498741 11157 solver.cpp:237] Train net output #0: loss = 0.0937675 (* 1 = 0.0937675 loss)
I0309 18:32:21.498785 11157 sgd_solver.cpp:105] Iteration 7940, lr = 0.01
I0309 18:32:32.785640 11157 solver.cpp:218] Iteration 7960 (1.77197 iter/s, 11.2869s/20 iters), loss = 0.0631587
I0309 18:32:32.785701 11157 solver.cpp:237] Train net output #0: loss = 0.0631588 (* 1 = 0.0631588 loss)
I0309 18:32:32.785713 11157 sgd_solver.cpp:105] Iteration 7960, lr = 0.01
I0309 18:32:41.650172 11157 solver.cpp:218] Iteration 7980 (2.25621 iter/s, 8.86444s/20 iters), loss = 0.0407214
I0309 18:32:41.650233 11157 solver.cpp:237] Train net output #0: loss = 0.0407215 (* 1 = 0.0407215 loss)
I0309 18:32:41.650245 11157 sgd_solver.cpp:105] Iteration 7980, lr = 0.01
I0309 18:32:49.210865 11157 solver.cpp:330] Iteration 8000, Testing net (#0)
I0309 18:34:55.362457 11157 solver.cpp:397] Test net output #0: accuracy = 0.524748
I0309 18:34:55.362599 11157 solver.cpp:397] Test net output #1: loss = 2.43989 (* 1 = 2.43989 loss)
I0309 18:34:55.511060 11166 data_layer.cpp:73] Restarting data prefetching from start.
I0309 18:34:55.662698 11157 solver.cpp:218] Iteration 8000 (0.14924 iter/s, 134.012s/20 iters), loss = 0.0704969
I0309 18:34:55.662761 11157 solver.cpp:237] Train net output #0: loss = 0.070497 (* 1 = 0.070497 loss)
I0309 18:34:55.662773 11157 sgd_solver.cpp:105] Iteration 8000, lr = 0.01
I0309 18:35:06.450870 11157 solver.cpp:218] Iteration 8020 (1.8539 iter/s, 10.7881s/20 iters), loss = 0.100138
I0309 18:35:06.450949 11157 solver.cpp:237] Train net output #0: loss = 0.100138 (* 1 = 0.100138 loss)
I0309 18:35:06.450960 11157 sgd_solver.cpp:105] Iteration 8020, lr = 0.01
I0309 18:35:07.103420 11157 blocking_queue.cpp:49] Waiting for data
I0309 18:35:15.513669 11157 solver.cpp:218] Iteration 8040 (2.20685 iter/s, 9.06268s/20 iters), loss = 0.0950916
I am classifying 3 Classes
Any Help is really appreciated
Thank you
I'm trying to implement a siamese network in caffe in which it is composed of two imagenets that don't share weights. So what I am basically trying to do is give each network an image, and in the end try to find out the distance between them for similarity, below is my prototxt. So my main question is what should I set my "num_output" too? I have only 2 classes for my training, 0 for wither they are not alike, and 1 for if they are similar.
name: "Siamese_ImageNet"
layers {
name: "data"
type: IMAGE_DATA
top: "data"
top: "label"
image_data_param {
source: "train1.txt"
batch_size: 32
new_height: 256
new_width: 256
}
include: { phase: TRAIN }
}
layers {
name: "data"
type: IMAGE_DATA
top: "data"
top: "label"
image_data_param {
source: "test1.txt"
batch_size: 32
new_height: 256
new_width: 256
}
include: { phase: TEST }
}
layers {
name: "data_p"
type: IMAGE_DATA
top: "data_p"
top: "label_p"
image_data_param {
source: "train2.txt"
batch_size: 32
new_height: 256
new_width: 256
}
include: { phase: TRAIN }
}
layers {
name: "data_p"
type: IMAGE_DATA
top: "data_p"
top: "label_p"
image_data_param {
source: "test2.txt"
batch_size: 32
new_height: 256
new_width: 256
}
include: { phase: TEST }
}
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm1"
type: LRN
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "norm1"
top: "conv2"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm2"
type: LRN
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "norm2"
top: "conv3"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "conv3"
top: "conv4"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "conv5"
type: CONVOLUTION
bottom: "conv4"
top: "conv5"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5"
type: RELU
bottom: "conv5"
top: "conv5"
}
layers {
name: "pool5"
type: POOLING
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "pool5"
top: "fc6"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu6"
type: RELU
bottom: "fc6"
top: "fc6"
}
layers {
name: "drop6"
type: DROPOUT
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7"
type: INNER_PRODUCT
bottom: "fc6"
top: "fc7"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu7"
type: RELU
bottom: "fc7"
top: "fc7"
}
layers {
name: "drop7"
type: DROPOUT
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "conv1_p"
type: CONVOLUTION
bottom: "data_p"
top: "conv1_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1_p"
type: RELU
bottom: "conv1_p"
top: "conv1_p"
}
layers {
name: "pool1_p"
type: POOLING
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm1_p"
type: LRN
bottom: "pool1_p"
top: "norm1_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv2_p"
type: CONVOLUTION
bottom: "norm1_p"
top: "conv2_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2_p"
type: RELU
bottom: "conv2_p"
top: "conv2_p"
}
layers {
name: "pool2_p"
type: POOLING
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm2_p"
type: LRN
bottom: "pool2_p"
top: "norm2_p"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv3_p"
type: CONVOLUTION
bottom: "norm2_p"
top: "conv3_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu3_p"
type: RELU
bottom: "conv3_p"
top: "conv3_p"
}
layers {
name: "conv4_p"
type: CONVOLUTION
bottom: "conv3_p"
top: "conv4_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4_p"
type: RELU
bottom: "conv4_p"
top: "conv4_p"
}
layers {
name: "conv5_p"
type: CONVOLUTION
bottom: "conv4_p"
top: "conv5_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5_p"
type: RELU
bottom: "conv5_p"
top: "conv5_p"
}
layers {
name: "pool5_p"
type: POOLING
bottom: "conv5_p"
top: "pool5_p"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "fc6_p"
type: INNER_PRODUCT
bottom: "pool5_p"
top: "fc6_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu6_p"
type: RELU
bottom: "fc6_p"
top: "fc6_p"
}
layers {
name: "drop6_p"
type: DROPOUT
bottom: "fc6_p"
top: "fc6_p"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7_p"
type: INNER_PRODUCT
bottom: "fc6_p"
top: "fc7_p"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu7_p"
type: RELU
bottom: "fc7_p"
top: "fc7_p"
}
layers {
name: "drop7_p"
type: DROPOUT
bottom: "fc7_p"
top: "fc7_p"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "loss"
type: CONTRASTIVE_LOSS
contrastive_loss_param {
margin: 1.0
}
bottom: "fc7"
bottom: "fc7_p"
bottom: "label"
top: "loss"
}
My training file structure:
0 is dissimilar, 1 is similar
train1.txt:
/aer/img1_1.jpg 0
/aer/img1_2.jpg 1
/aer/img1_3.jpg 1
train2.txt:
/tpd/img2_1.jpg 0
/tpd/img2_2.jpg 1
/tpd/img2_3.jpg 1
What should I set my "num_output"?
Before understanding how much you should set the num_output, let's explain what it means. In fact, you can view the two sides of the Simense network, data -> fc7, data_p -> fc7_p as 2 feature extractors. Each one is extracting feature e.g.fc7 and fc7_p from the images in the corresponding data layer. So num_output defines the dimension of the extracted feature vector.
During training, the ContrastiveLoss layer always tries to minimize the 2 extracted feature vectors' distance when the images the vectors represent for are similiar(label == 1) and maximize the distance when dissimiliar(label == 0). Namely, the smaller the distance of the feature vectors is, the more similar the images are.
So what's the optimal dimension of the feature vector to best contain the information indicating the similarity? Or what should you set the num_output? There may not be an exact value, and it depends on the encoding quality of the feature extractor(you may view the feature as a code of the image) and how much hard it is to recognize the similarity of the images. So basically if the network(feature extractor) is deep and it is not too hard to recognize the similarity, you can choose a relative small num_output e.g.200, because the feature may be encoded well by a larger network and be more discriminative . If it is not , you can try a larger value e.g. 500, 1000 or try a more complicated network.
If you want to try a MultinomialLogisticLoss instead of ContrastiveLoss layer, you should first fusion the 2 feature vectors fc7, fc7_p into 1 using a layer like CONCAT and then feed it into a SOFTMAX_LOSS layer, like this:
...#original layers
layers {
name: "concat"
type: CONCAT
bottom: "fc7"
bottom: "fc7_p"
top: "fc_concat" # concatenate fc7 and fc7_p along channel axis
}
layer {
name: "fc_cls"
type: INNER_PRODUCT
bottom: "fc_concat"
top: "fc_cls"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 2 # a binary classification problem in this case
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: ACCURACY
bottom: "fc_cls"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: SOFTMAX_LOSS
bottom: "fc_cls"
bottom: "label"
top: "loss"
}
Update
Which is the best method to implement in order to compare similarity and use it for deploy, Constrastive Loss or SoftMax Loss?
Softmax Loss is simple and easy for deploy. But it can only give you the binary prediction, namely similar or dissimilar. The probability distribution over the 2 class(similar, dissimilar) it gives is often too hard(nonuniform), e.g. [0.9*, 0.0*], [0.0*, 0.9*],.... which in many cases will not reflect the true input similarity degree well.
While using Constrastive Loss you can get a discriminative feature vector for an image. And you can use the vector to compute a probability of similarity, as what the CVPR 2005 paper Learning a Similarity Metric Discriminatively, with Application to Face Verification did in Section 4.1.(The key point is to compute a multivariate normal density using the feature vectors generated from the images belonging to a same subject). Also you can use a threshold to control the false positive rate and the false negative rate of the model to get a ROC curve to better evaluate a model.
By the way, to dig out more CNN architectures for predicting similarity, you can refer to the CVPR 2015 paper Learning to Compare Image Patches via Convolutional Neural Networks.
Just to correct Dale's great answer above for Caffe's uber sensitive syntax, for noobs that get stuck like myself, here's a few corrections (layers to layer, some quotes, plus removal of comments, and valid capitalization)
layer {
name: "concat"
type: "Concat"
bottom: "fc7"
bottom: "fc7_p"
top: "fc_concat"
}
layer {
name: "fc_cls"
type: "InnerProduct"
bottom: "fc_concat"
top: "fc_cls"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc_cls"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc_cls"
bottom: "label"
top: "loss"
}
I believe num_output defines the dimension of the extracted feature vector and then extracted feature can be used to determine the L2 distance. If the L2 distance is greater than 1 then it is a different class and if it is close to 0 the image is similar. Rest the Dale answer is perfect.
The initial loss which is computed when my model is run on the image dataset is remaining constant everytime I run caffe. Isn't the behavior strange , since the initial loss should be different (atleast slightly) when we run the model every-time.
Also the loss value is remaining constant across the further iterations if I give SoftmaxWithLoss function in the loss layer. Also, if I give lr_mult=0 in the deconvolution layer, the loss is appearing constant across iterations. If I give lr_mult=1, then the loss value changes , but finally will meet the intial loss value after few thousand iterations.
Any ideas in this regard?
Any help is highly appreciated in this regard.
The following is my solver file:
test_iter: 100
test_interval: 100 # py solving tests
display: 100
#average_loss: 100
lr_policy: "fixed"
base_lr: 0.0000001
momentum: 0.5
iter_size: 1
# base_lr: 1e-9
# momentum: 0.99
# iter_size: 1
max_iter: 1000000
#weight_decay: 0.0005
snapshot: 1000
snapshot_diff: true
#test_initialization: false
solver_mode: GPU
The following is the architecture of my convolutional neural network.
# Input 128 640 3
# Conv1 (kernel=3) 126 638 64
# ReLU
# Conv2 (kernel=3) 124 636 128
# ReLU
# Conv3 (kernel=3) 122 634 256
# ReLU
# Pool1 (kernel=2) 61 317 256
# Conv4 (kernel=4) 58 314 512
# ReLU
# Conv5 (kernel=4) 55 311 1024
# ReLU
# Conv6 (kernel=4) 52 308 512
# ReLU
# Pool2 (kernel=2) 26 154 512
# Conv7 (kernel=4,stride=2,pad=3) 15 79 5
# ReLU
# Decon1 (kernel=16,stride=8,pad=3) 128 640 5
# ReLU
# Loss
name: "Conv-Deconv-Net"
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TRAIN
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_IMG_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TRAIN
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_LBL_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TEST
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_IMG_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TEST
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_LBL_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
pad: 0
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "relu1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "relu1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "relu2"
}
layer {
name: "conv3"
type: "Convolution"
bottom: "relu2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "relu3"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "relu3"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool1"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "relu4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "relu4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "relu5"
}
layer {
name: "conv6"
type: "Convolution"
bottom: "relu5"
top: "conv6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "relu6"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "relu6"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "pool2"
top: "conv7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 5
pad: 3
kernel_size: 4
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "conv7"
top: "relu7"
}
layer {
name: "deconv1"
type: "Deconvolution"
bottom: "relu7"
top: "deconv1"
param {
lr_mult: 1
}
convolution_param {
num_output: 5
bias_term: false
kernel_size: 16
stride: 8
pad: 0
weight_filler {
type: "bilinear"
}
}
}
layer {
name: "relu8"
type: "ReLU"
bottom: "deconv1"
top: "relu8"
}
#layer {
# name: "crop"
# type: "Crop"
# bottom: "deconv3"
# bottom: "data"
# top: "score"
#}
layer {
name: "prob"
type: "SoftmaxWithLoss"
bottom: "relu8"
bottom: "label"
top: "loss"
loss_param {
# ignore_label: 255
# normalize: true
normalize: false
}
}
It seems your base_lr is too small. Keep it at 0.00001 in the beginning and try keeping your momentum at 0.9. If your learning rate is too small, convergence will be very slow and if it is too high gradient descent will overshoot the local minima (that's when you see your loss shoots up). So you have to come at an optimal value, iteratively. There is no magic number for this.
I'm working with caffe framework and I would like to train the next network:
When I execute the next command:
caffe train --solver solver.prototxt
The error it throws:
`F0802 14:31:54.506695 28038 insert_splits.cpp:29] Unknown bottom blob 'image' (layer 'conv1', bottom index 0)
*** Check failure stack trace: ***
# 0x7ff2941c3f9d google::LogMessage::Fail()
# 0x7ff2941c5e03 google::LogMessage::SendToLog()
# 0x7ff2941c3b2b google::LogMessage::Flush()
# 0x7ff2941c67ee google::LogMessageFatal::~LogMessageFatal()
# 0x7ff2947cedbe caffe::InsertSplits()
# 0x7ff2948306de caffe::Net<>::Init()
# 0x7ff294833a81 caffe::Net<>::Net()
# 0x7ff29480ce6a caffe::Solver<>::InitTestNets()
# 0x7ff29480ee85 caffe::Solver<>::Init()
# 0x7ff29480f19a caffe::Solver<>::Solver()
# 0x7ff2947f4343 caffe::Creator_SGDSolver<>()
# 0x40b1a0 (unknown)
# 0x407373 (unknown)
# 0x7ff292e40741 __libc_start_main
# 0x407b79 (unknown)
Abortado (`core' generado)
The code is (train2.prototxt):
name: "xxxxxx"
layer {
name: "image"
type: "HDF5Data"
top: "image"
top: "label"
hdf5_data_param {
source: "h5a.train.h5.txt"
batch_size: 64
}
include {
phase: TRAIN
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "image"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool1"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv3"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "improd3"
type: "InnerProduct"
bottom: "pool2"
top: "improd3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1000
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "improd3"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "improd3"
bottom: "label"
top: "loss"
}
The solver.prototxt:
net: "train2.prototxt"
test_iter: 100
test_interval: 1000
# lr for fine-tuning should be lower than when starting from scratch
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
# stepsize should also be lower, as we're closer to being done
stepsize: 20000
display: 20
max_iter: 100000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "caffe"
solver_mode: CPU
I'm stuck and i cant start the training of the network because this problem.
It is because, even if you are trying to execute the Train phase, the Test phase will also be used for validation. As there is no input data layer for the Test phase, the conv1 layer cannot find the input blob image. The Test phase is being called because you have defined test_* parameters in the solver and phase: TEST in some of the layers in train2.prototxt. Removing the above mentioned parameters from solver and the layers representing the TEST phase will help you run the training without any issues.
I had the Caffe C++ example program working on my computer, but after recently recompiling Caffe, I've encountered this error when I try to run the program:
[libprotobuf ERROR google/protobuf/text_format.cc:245] Error parsing
text-format caffe.NetParameter: 2:4: Message type "caffe.NetParameter"
has no field named "net".
upgrade_proto.cpp:928] Check failed: ReadProtoFromTextFile(param_file,
param) Failed to parse NetParameter file:
/home/jack/Desktop/beeshiny/deploy.prototxt
Am I missing something or has the syntax of the prototxt files been changed? My deploy.prototxt file (that I pass to the C++ program) looks like this:
# The train/test net protocol buffer definition
net: "/home/jack/Desktop/beeshiny/deploy_arch.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "lenet"
# solver mode: CPU or GPU
solver_mode: CPU
The contents of the deploy_arch.prototxt file referenced in the prototxt file above:
name: "LeNet"
input: "data"
input_shape {
dim: 10
dim: 1
dim: 24
dim: 24
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "loss"
type: "Softmax"
bottom: "ip2"
top: "loss"
}
I don't understand why this has stopped working all of a sudden, unless an update has made my prototxt file obsolete?
I solved my problem by adding caffe/python in $PYTHONPATH.