We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hi, I'm trying to train my own data. This is the solver : net: "train_val.prototxt" #test_initialization: false #test_iter: 100 #test_interval: 1000 display: 20 average_loss: 20 base_lr: 0.000001 lr_policy: "poly" power: 1.0 max_iter: 500 momentum: 0.9 weight_decay: 0.0001 snapshot: 100 snapshot_prefix: "mobilenet"
this is the train_val : name: "MOBILENET"
layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TRAIN } transform_param { mean_file: "imagenet_mean.binaryproto" mirror:false } image_data_param { source: "./train.txt" batch_size: 16 new_height: 256 new_width: 256 root_folder: "/" } }
layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 32 bias_term: false pad: 1 kernel_size: 3 stride: 2 weight_filler { type: "msra" } } } layer { name: "conv1/bn" type: "BatchNorm" bottom: "conv1" top: "conv1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv1/scale" type: "Scale" bottom: "conv1" top: "conv1" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "conv2_1/dw" type: "Convolution" bottom: "conv1" top: "conv2_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 32 bias_term: false pad: 1 kernel_size: 3 group: 32 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv2_1/dw/bn" type: "BatchNorm" bottom: "conv2_1/dw" top: "conv2_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_1/dw/scale" type: "Scale" bottom: "conv2_1/dw" top: "conv2_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_1/dw" type: "ReLU" bottom: "conv2_1/dw" top: "conv2_1/dw" } layer { name: "conv2_1/sep" type: "Convolution" bottom: "conv2_1/dw" top: "conv2_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 64 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv2_1/sep/bn" type: "BatchNorm" bottom: "conv2_1/sep" top: "conv2_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_1/sep/scale" type: "Scale" bottom: "conv2_1/sep" top: "conv2_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_1/sep" type: "ReLU" bottom: "conv2_1/sep" top: "conv2_1/sep" } layer { name: "conv2_2/dw" type: "Convolution" bottom: "conv2_1/sep" top: "conv2_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 64 bias_term: false pad: 1 kernel_size: 3 group: 64 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv2_2/dw/bn" type: "BatchNorm" bottom: "conv2_2/dw" top: "conv2_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_2/dw/scale" type: "Scale" bottom: "conv2_2/dw" top: "conv2_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_2/dw" type: "ReLU" bottom: "conv2_2/dw" top: "conv2_2/dw" } layer { name: "conv2_2/sep" type: "Convolution" bottom: "conv2_2/dw" top: "conv2_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv2_2/sep/bn" type: "BatchNorm" bottom: "conv2_2/sep" top: "conv2_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv2_2/sep/scale" type: "Scale" bottom: "conv2_2/sep" top: "conv2_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu2_2/sep" type: "ReLU" bottom: "conv2_2/sep" top: "conv2_2/sep" } layer { name: "conv3_1/dw" type: "Convolution" bottom: "conv2_2/sep" top: "conv3_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 1 kernel_size: 3 group: 128 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv3_1/dw/bn" type: "BatchNorm" bottom: "conv3_1/dw" top: "conv3_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_1/dw/scale" type: "Scale" bottom: "conv3_1/dw" top: "conv3_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_1/dw" type: "ReLU" bottom: "conv3_1/dw" top: "conv3_1/dw" } layer { name: "conv3_1/sep" type: "Convolution" bottom: "conv3_1/dw" top: "conv3_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv3_1/sep/bn" type: "BatchNorm" bottom: "conv3_1/sep" top: "conv3_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_1/sep/scale" type: "Scale" bottom: "conv3_1/sep" top: "conv3_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_1/sep" type: "ReLU" bottom: "conv3_1/sep" top: "conv3_1/sep" } layer { name: "conv3_2/dw" type: "Convolution" bottom: "conv3_1/sep" top: "conv3_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 128 bias_term: false pad: 1 kernel_size: 3 group: 128 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv3_2/dw/bn" type: "BatchNorm" bottom: "conv3_2/dw" top: "conv3_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_2/dw/scale" type: "Scale" bottom: "conv3_2/dw" top: "conv3_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_2/dw" type: "ReLU" bottom: "conv3_2/dw" top: "conv3_2/dw" } layer { name: "conv3_2/sep" type: "Convolution" bottom: "conv3_2/dw" top: "conv3_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv3_2/sep/bn" type: "BatchNorm" bottom: "conv3_2/sep" top: "conv3_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv3_2/sep/scale" type: "Scale" bottom: "conv3_2/sep" top: "conv3_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu3_2/sep" type: "ReLU" bottom: "conv3_2/sep" top: "conv3_2/sep" } layer { name: "conv4_1/dw" type: "Convolution" bottom: "conv3_2/sep" top: "conv4_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 1 kernel_size: 3 group: 256 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv4_1/dw/bn" type: "BatchNorm" bottom: "conv4_1/dw" top: "conv4_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_1/dw/scale" type: "Scale" bottom: "conv4_1/dw" top: "conv4_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_1/dw" type: "ReLU" bottom: "conv4_1/dw" top: "conv4_1/dw" } layer { name: "conv4_1/sep" type: "Convolution" bottom: "conv4_1/dw" top: "conv4_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv4_1/sep/bn" type: "BatchNorm" bottom: "conv4_1/sep" top: "conv4_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_1/sep/scale" type: "Scale" bottom: "conv4_1/sep" top: "conv4_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_1/sep" type: "ReLU" bottom: "conv4_1/sep" top: "conv4_1/sep" } layer { name: "conv4_2/dw" type: "Convolution" bottom: "conv4_1/sep" top: "conv4_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 256 bias_term: false pad: 1 kernel_size: 3 group: 256 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv4_2/dw/bn" type: "BatchNorm" bottom: "conv4_2/dw" top: "conv4_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_2/dw/scale" type: "Scale" bottom: "conv4_2/dw" top: "conv4_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_2/dw" type: "ReLU" bottom: "conv4_2/dw" top: "conv4_2/dw" } layer { name: "conv4_2/sep" type: "Convolution" bottom: "conv4_2/dw" top: "conv4_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv4_2/sep/bn" type: "BatchNorm" bottom: "conv4_2/sep" top: "conv4_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv4_2/sep/scale" type: "Scale" bottom: "conv4_2/sep" top: "conv4_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu4_2/sep" type: "ReLU" bottom: "conv4_2/sep" top: "conv4_2/sep" } layer { name: "conv5_1/dw" type: "Convolution" bottom: "conv4_2/sep" top: "conv5_1/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_1/dw/bn" type: "BatchNorm" bottom: "conv5_1/dw" top: "conv5_1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_1/dw/scale" type: "Scale" bottom: "conv5_1/dw" top: "conv5_1/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_1/dw" type: "ReLU" bottom: "conv5_1/dw" top: "conv5_1/dw" } layer { name: "conv5_1/sep" type: "Convolution" bottom: "conv5_1/dw" top: "conv5_1/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_1/sep/bn" type: "BatchNorm" bottom: "conv5_1/sep" top: "conv5_1/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_1/sep/scale" type: "Scale" bottom: "conv5_1/sep" top: "conv5_1/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_1/sep" type: "ReLU" bottom: "conv5_1/sep" top: "conv5_1/sep" } layer { name: "conv5_2/dw" type: "Convolution" bottom: "conv5_1/sep" top: "conv5_2/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_2/dw/bn" type: "BatchNorm" bottom: "conv5_2/dw" top: "conv5_2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_2/dw/scale" type: "Scale" bottom: "conv5_2/dw" top: "conv5_2/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_2/dw" type: "ReLU" bottom: "conv5_2/dw" top: "conv5_2/dw" } layer { name: "conv5_2/sep" type: "Convolution" bottom: "conv5_2/dw" top: "conv5_2/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_2/sep/bn" type: "BatchNorm" bottom: "conv5_2/sep" top: "conv5_2/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_2/sep/scale" type: "Scale" bottom: "conv5_2/sep" top: "conv5_2/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_2/sep" type: "ReLU" bottom: "conv5_2/sep" top: "conv5_2/sep" } layer { name: "conv5_3/dw" type: "Convolution" bottom: "conv5_2/sep" top: "conv5_3/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_3/dw/bn" type: "BatchNorm" bottom: "conv5_3/dw" top: "conv5_3/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_3/dw/scale" type: "Scale" bottom: "conv5_3/dw" top: "conv5_3/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_3/dw" type: "ReLU" bottom: "conv5_3/dw" top: "conv5_3/dw" } layer { name: "conv5_3/sep" type: "Convolution" bottom: "conv5_3/dw" top: "conv5_3/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_3/sep/bn" type: "BatchNorm" bottom: "conv5_3/sep" top: "conv5_3/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_3/sep/scale" type: "Scale" bottom: "conv5_3/sep" top: "conv5_3/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_3/sep" type: "ReLU" bottom: "conv5_3/sep" top: "conv5_3/sep" } layer { name: "conv5_4/dw" type: "Convolution" bottom: "conv5_3/sep" top: "conv5_4/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_4/dw/bn" type: "BatchNorm" bottom: "conv5_4/dw" top: "conv5_4/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_4/dw/scale" type: "Scale" bottom: "conv5_4/dw" top: "conv5_4/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_4/dw" type: "ReLU" bottom: "conv5_4/dw" top: "conv5_4/dw" } layer { name: "conv5_4/sep" type: "Convolution" bottom: "conv5_4/dw" top: "conv5_4/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_4/sep/bn" type: "BatchNorm" bottom: "conv5_4/sep" top: "conv5_4/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_4/sep/scale" type: "Scale" bottom: "conv5_4/sep" top: "conv5_4/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_4/sep" type: "ReLU" bottom: "conv5_4/sep" top: "conv5_4/sep" } layer { name: "conv5_5/dw" type: "Convolution" bottom: "conv5_4/sep" top: "conv5_5/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_5/dw/bn" type: "BatchNorm" bottom: "conv5_5/dw" top: "conv5_5/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_5/dw/scale" type: "Scale" bottom: "conv5_5/dw" top: "conv5_5/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_5/dw" type: "ReLU" bottom: "conv5_5/dw" top: "conv5_5/dw" } layer { name: "conv5_5/sep" type: "Convolution" bottom: "conv5_5/dw" top: "conv5_5/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_5/sep/bn" type: "BatchNorm" bottom: "conv5_5/sep" top: "conv5_5/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_5/sep/scale" type: "Scale" bottom: "conv5_5/sep" top: "conv5_5/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_5/sep" type: "ReLU" bottom: "conv5_5/sep" top: "conv5_5/sep" } layer { name: "conv5_6/dw" type: "Convolution" bottom: "conv5_5/sep" top: "conv5_6/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512 engine: CAFFE stride: 2 weight_filler { type: "msra" } } } layer { name: "conv5_6/dw/bn" type: "BatchNorm" bottom: "conv5_6/dw" top: "conv5_6/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_6/dw/scale" type: "Scale" bottom: "conv5_6/dw" top: "conv5_6/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_6/dw" type: "ReLU" bottom: "conv5_6/dw" top: "conv5_6/dw" } layer { name: "conv5_6/sep" type: "Convolution" bottom: "conv5_6/dw" top: "conv5_6/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 1024 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv5_6/sep/bn" type: "BatchNorm" bottom: "conv5_6/sep" top: "conv5_6/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv5_6/sep/scale" type: "Scale" bottom: "conv5_6/sep" top: "conv5_6/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu5_6/sep" type: "ReLU" bottom: "conv5_6/sep" top: "conv5_6/sep" } layer { name: "conv6/dw" type: "Convolution" bottom: "conv5_6/sep" top: "conv6/dw" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 1024 bias_term: false pad: 1 kernel_size: 3 group: 1024 engine: CAFFE stride: 1 weight_filler { type: "msra" } } } layer { name: "conv6/dw/bn" type: "BatchNorm" bottom: "conv6/dw" top: "conv6/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv6/dw/scale" type: "Scale" bottom: "conv6/dw" top: "conv6/dw" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu6/dw" type: "ReLU" bottom: "conv6/dw" top: "conv6/dw" } layer { name: "conv6/sep" type: "Convolution" bottom: "conv6/dw" top: "conv6/sep" param { lr_mult: 1 decay_mult: 1 } convolution_param { num_output: 1024 bias_term: false pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "msra" } } } layer { name: "conv6/sep/bn" type: "BatchNorm" bottom: "conv6/sep" top: "conv6/sep" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } batch_norm_param { use_global_stats: true eps: 1e-5 } } layer { name: "conv6/sep/scale" type: "Scale" bottom: "conv6/sep" top: "conv6/sep" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "relu6/sep" type: "ReLU" bottom: "conv6/sep" top: "conv6/sep" } layer { name: "pool6" type: "Pooling" bottom: "conv6/sep" top: "pool6" pooling_param { pool: AVE global_pooling: true } } layer { name: "fc7" type: "Convolution" bottom: "pool6" top: "fc7" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 42 kernel_size: 1 weight_filler { type: "msra" } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc7" bottom: "label" top: "loss" } layer { name: "top1/acc" type: "Accuracy" bottom: "fc7" bottom: "label" top: "top1/acc" include { phase: TEST } } layer { name: "top5/acc" type: "Accuracy" bottom: "fc7" bottom: "label" top: "top5/acc" include { phase: TEST } accuracy_param { top_k: 5 } }
I tried also to replace the mean as you said in your tutorial but also I get
I0421 13:23:50.226541 2808 solver.cpp:218] Iteration 0 (-1.82169e-44 iter/s, 40.202s/20 iters), loss = -nan I0421 13:23:50.226686 2808 solver.cpp:237] Train net output #0: loss = -nan (* 1 = -nan loss) I0421 13:23:50.226697 2808 sgd_solver.cpp:105] Iteration 0, lr = 1e-06
I really don't understand why loss goes to -nan
The text was updated successfully, but these errors were encountered:
I got this problem too, have you find the reason?
Sorry, something went wrong.
@Jacoppy @zhangnn016 try to remove all use_global_stats: true in the batch_norm_param, let the BatchNorm layers be in a default value
use_global_stats: true
batch_norm_param
Thank you very much, it works for me!
No branches or pull requests
Hi,
I'm trying to train my own data.
This is the solver :
net: "train_val.prototxt"
#test_initialization: false
#test_iter: 100
#test_interval: 1000
display: 20
average_loss: 20
base_lr: 0.000001
lr_policy: "poly"
power: 1.0
max_iter: 500
momentum: 0.9
weight_decay: 0.0001
snapshot: 100
snapshot_prefix: "mobilenet"
this is the train_val :
name: "MOBILENET"
transform_param {
scale: 0.017
mirror: false
crop_size: 224
mean_value: [103.94,116.78,123.68]
}
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mean_file: "imagenet_mean.binaryproto"
mirror:false
}
image_data_param {
source: "./train.txt"
batch_size: 16
new_height: 256
new_width: 256
root_folder: "/"
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv1/bn"
type: "BatchNorm"
bottom: "conv1"
top: "conv1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv1/scale"
type: "Scale"
bottom: "conv1"
top: "conv1"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv2_1/dw"
type: "Convolution"
bottom: "conv1"
top: "conv2_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
group: 32
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_1/dw/bn"
type: "BatchNorm"
bottom: "conv2_1/dw"
top: "conv2_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_1/dw/scale"
type: "Scale"
bottom: "conv2_1/dw"
top: "conv2_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_1/dw"
type: "ReLU"
bottom: "conv2_1/dw"
top: "conv2_1/dw"
}
layer {
name: "conv2_1/sep"
type: "Convolution"
bottom: "conv2_1/dw"
top: "conv2_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 64
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_1/sep/bn"
type: "BatchNorm"
bottom: "conv2_1/sep"
top: "conv2_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_1/sep/scale"
type: "Scale"
bottom: "conv2_1/sep"
top: "conv2_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_1/sep"
type: "ReLU"
bottom: "conv2_1/sep"
top: "conv2_1/sep"
}
layer {
name: "conv2_2/dw"
type: "Convolution"
bottom: "conv2_1/sep"
top: "conv2_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 64
bias_term: false
pad: 1
kernel_size: 3
group: 64
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_2/dw/bn"
type: "BatchNorm"
bottom: "conv2_2/dw"
top: "conv2_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_2/dw/scale"
type: "Scale"
bottom: "conv2_2/dw"
top: "conv2_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_2/dw"
type: "ReLU"
bottom: "conv2_2/dw"
top: "conv2_2/dw"
}
layer {
name: "conv2_2/sep"
type: "Convolution"
bottom: "conv2_2/dw"
top: "conv2_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_2/sep/bn"
type: "BatchNorm"
bottom: "conv2_2/sep"
top: "conv2_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_2/sep/scale"
type: "Scale"
bottom: "conv2_2/sep"
top: "conv2_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_2/sep"
type: "ReLU"
bottom: "conv2_2/sep"
top: "conv2_2/sep"
}
layer {
name: "conv3_1/dw"
type: "Convolution"
bottom: "conv2_2/sep"
top: "conv3_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
group: 128
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_1/dw/bn"
type: "BatchNorm"
bottom: "conv3_1/dw"
top: "conv3_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_1/dw/scale"
type: "Scale"
bottom: "conv3_1/dw"
top: "conv3_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_1/dw"
type: "ReLU"
bottom: "conv3_1/dw"
top: "conv3_1/dw"
}
layer {
name: "conv3_1/sep"
type: "Convolution"
bottom: "conv3_1/dw"
top: "conv3_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_1/sep/bn"
type: "BatchNorm"
bottom: "conv3_1/sep"
top: "conv3_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_1/sep/scale"
type: "Scale"
bottom: "conv3_1/sep"
top: "conv3_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_1/sep"
type: "ReLU"
bottom: "conv3_1/sep"
top: "conv3_1/sep"
}
layer {
name: "conv3_2/dw"
type: "Convolution"
bottom: "conv3_1/sep"
top: "conv3_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
group: 128
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_2/dw/bn"
type: "BatchNorm"
bottom: "conv3_2/dw"
top: "conv3_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_2/dw/scale"
type: "Scale"
bottom: "conv3_2/dw"
top: "conv3_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_2/dw"
type: "ReLU"
bottom: "conv3_2/dw"
top: "conv3_2/dw"
}
layer {
name: "conv3_2/sep"
type: "Convolution"
bottom: "conv3_2/dw"
top: "conv3_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_2/sep/bn"
type: "BatchNorm"
bottom: "conv3_2/sep"
top: "conv3_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_2/sep/scale"
type: "Scale"
bottom: "conv3_2/sep"
top: "conv3_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_2/sep"
type: "ReLU"
bottom: "conv3_2/sep"
top: "conv3_2/sep"
}
layer {
name: "conv4_1/dw"
type: "Convolution"
bottom: "conv3_2/sep"
top: "conv4_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
group: 256
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_1/dw/bn"
type: "BatchNorm"
bottom: "conv4_1/dw"
top: "conv4_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_1/dw/scale"
type: "Scale"
bottom: "conv4_1/dw"
top: "conv4_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_1/dw"
type: "ReLU"
bottom: "conv4_1/dw"
top: "conv4_1/dw"
}
layer {
name: "conv4_1/sep"
type: "Convolution"
bottom: "conv4_1/dw"
top: "conv4_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_1/sep/bn"
type: "BatchNorm"
bottom: "conv4_1/sep"
top: "conv4_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_1/sep/scale"
type: "Scale"
bottom: "conv4_1/sep"
top: "conv4_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_1/sep"
type: "ReLU"
bottom: "conv4_1/sep"
top: "conv4_1/sep"
}
layer {
name: "conv4_2/dw"
type: "Convolution"
bottom: "conv4_1/sep"
top: "conv4_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
group: 256
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_2/dw/bn"
type: "BatchNorm"
bottom: "conv4_2/dw"
top: "conv4_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_2/dw/scale"
type: "Scale"
bottom: "conv4_2/dw"
top: "conv4_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_2/dw"
type: "ReLU"
bottom: "conv4_2/dw"
top: "conv4_2/dw"
}
layer {
name: "conv4_2/sep"
type: "Convolution"
bottom: "conv4_2/dw"
top: "conv4_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_2/sep/bn"
type: "BatchNorm"
bottom: "conv4_2/sep"
top: "conv4_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_2/sep/scale"
type: "Scale"
bottom: "conv4_2/sep"
top: "conv4_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_2/sep"
type: "ReLU"
bottom: "conv4_2/sep"
top: "conv4_2/sep"
}
layer {
name: "conv5_1/dw"
type: "Convolution"
bottom: "conv4_2/sep"
top: "conv5_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_1/dw/bn"
type: "BatchNorm"
bottom: "conv5_1/dw"
top: "conv5_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_1/dw/scale"
type: "Scale"
bottom: "conv5_1/dw"
top: "conv5_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_1/dw"
type: "ReLU"
bottom: "conv5_1/dw"
top: "conv5_1/dw"
}
layer {
name: "conv5_1/sep"
type: "Convolution"
bottom: "conv5_1/dw"
top: "conv5_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_1/sep/bn"
type: "BatchNorm"
bottom: "conv5_1/sep"
top: "conv5_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_1/sep/scale"
type: "Scale"
bottom: "conv5_1/sep"
top: "conv5_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_1/sep"
type: "ReLU"
bottom: "conv5_1/sep"
top: "conv5_1/sep"
}
layer {
name: "conv5_2/dw"
type: "Convolution"
bottom: "conv5_1/sep"
top: "conv5_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_2/dw/bn"
type: "BatchNorm"
bottom: "conv5_2/dw"
top: "conv5_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_2/dw/scale"
type: "Scale"
bottom: "conv5_2/dw"
top: "conv5_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_2/dw"
type: "ReLU"
bottom: "conv5_2/dw"
top: "conv5_2/dw"
}
layer {
name: "conv5_2/sep"
type: "Convolution"
bottom: "conv5_2/dw"
top: "conv5_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_2/sep/bn"
type: "BatchNorm"
bottom: "conv5_2/sep"
top: "conv5_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_2/sep/scale"
type: "Scale"
bottom: "conv5_2/sep"
top: "conv5_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_2/sep"
type: "ReLU"
bottom: "conv5_2/sep"
top: "conv5_2/sep"
}
layer {
name: "conv5_3/dw"
type: "Convolution"
bottom: "conv5_2/sep"
top: "conv5_3/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_3/dw/bn"
type: "BatchNorm"
bottom: "conv5_3/dw"
top: "conv5_3/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_3/dw/scale"
type: "Scale"
bottom: "conv5_3/dw"
top: "conv5_3/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_3/dw"
type: "ReLU"
bottom: "conv5_3/dw"
top: "conv5_3/dw"
}
layer {
name: "conv5_3/sep"
type: "Convolution"
bottom: "conv5_3/dw"
top: "conv5_3/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_3/sep/bn"
type: "BatchNorm"
bottom: "conv5_3/sep"
top: "conv5_3/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_3/sep/scale"
type: "Scale"
bottom: "conv5_3/sep"
top: "conv5_3/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_3/sep"
type: "ReLU"
bottom: "conv5_3/sep"
top: "conv5_3/sep"
}
layer {
name: "conv5_4/dw"
type: "Convolution"
bottom: "conv5_3/sep"
top: "conv5_4/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_4/dw/bn"
type: "BatchNorm"
bottom: "conv5_4/dw"
top: "conv5_4/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_4/dw/scale"
type: "Scale"
bottom: "conv5_4/dw"
top: "conv5_4/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_4/dw"
type: "ReLU"
bottom: "conv5_4/dw"
top: "conv5_4/dw"
}
layer {
name: "conv5_4/sep"
type: "Convolution"
bottom: "conv5_4/dw"
top: "conv5_4/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_4/sep/bn"
type: "BatchNorm"
bottom: "conv5_4/sep"
top: "conv5_4/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_4/sep/scale"
type: "Scale"
bottom: "conv5_4/sep"
top: "conv5_4/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_4/sep"
type: "ReLU"
bottom: "conv5_4/sep"
top: "conv5_4/sep"
}
layer {
name: "conv5_5/dw"
type: "Convolution"
bottom: "conv5_4/sep"
top: "conv5_5/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_5/dw/bn"
type: "BatchNorm"
bottom: "conv5_5/dw"
top: "conv5_5/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_5/dw/scale"
type: "Scale"
bottom: "conv5_5/dw"
top: "conv5_5/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_5/dw"
type: "ReLU"
bottom: "conv5_5/dw"
top: "conv5_5/dw"
}
layer {
name: "conv5_5/sep"
type: "Convolution"
bottom: "conv5_5/dw"
top: "conv5_5/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_5/sep/bn"
type: "BatchNorm"
bottom: "conv5_5/sep"
top: "conv5_5/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_5/sep/scale"
type: "Scale"
bottom: "conv5_5/sep"
top: "conv5_5/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_5/sep"
type: "ReLU"
bottom: "conv5_5/sep"
top: "conv5_5/sep"
}
layer {
name: "conv5_6/dw"
type: "Convolution"
bottom: "conv5_5/sep"
top: "conv5_6/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_6/dw/bn"
type: "BatchNorm"
bottom: "conv5_6/dw"
top: "conv5_6/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_6/dw/scale"
type: "Scale"
bottom: "conv5_6/dw"
top: "conv5_6/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_6/dw"
type: "ReLU"
bottom: "conv5_6/dw"
top: "conv5_6/dw"
}
layer {
name: "conv5_6/sep"
type: "Convolution"
bottom: "conv5_6/dw"
top: "conv5_6/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_6/sep/bn"
type: "BatchNorm"
bottom: "conv5_6/sep"
top: "conv5_6/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_6/sep/scale"
type: "Scale"
bottom: "conv5_6/sep"
top: "conv5_6/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_6/sep"
type: "ReLU"
bottom: "conv5_6/sep"
top: "conv5_6/sep"
}
layer {
name: "conv6/dw"
type: "Convolution"
bottom: "conv5_6/sep"
top: "conv6/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
bias_term: false
pad: 1
kernel_size: 3
group: 1024
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv6/dw/bn"
type: "BatchNorm"
bottom: "conv6/dw"
top: "conv6/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv6/dw/scale"
type: "Scale"
bottom: "conv6/dw"
top: "conv6/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu6/dw"
type: "ReLU"
bottom: "conv6/dw"
top: "conv6/dw"
}
layer {
name: "conv6/sep"
type: "Convolution"
bottom: "conv6/dw"
top: "conv6/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv6/sep/bn"
type: "BatchNorm"
bottom: "conv6/sep"
top: "conv6/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv6/sep/scale"
type: "Scale"
bottom: "conv6/sep"
top: "conv6/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu6/sep"
type: "ReLU"
bottom: "conv6/sep"
top: "conv6/sep"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6/sep"
top: "pool6"
pooling_param {
pool: AVE
global_pooling: true
}
}
layer {
name: "fc7"
type: "Convolution"
bottom: "pool6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 42
kernel_size: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc7"
bottom: "label"
top: "loss"
}
layer {
name: "top1/acc"
type: "Accuracy"
bottom: "fc7"
bottom: "label"
top: "top1/acc"
include {
phase: TEST
}
}
layer {
name: "top5/acc"
type: "Accuracy"
bottom: "fc7"
bottom: "label"
top: "top5/acc"
include {
phase: TEST
}
accuracy_param {
top_k: 5
}
}
I tried also to replace the mean as you said in your tutorial but also I get
I0421 13:23:50.226541 2808 solver.cpp:218] Iteration 0 (-1.82169e-44 iter/s, 40.202s/20 iters), loss = -nan
I0421 13:23:50.226686 2808 solver.cpp:237] Train net output #0: loss = -nan (* 1 = -nan loss)
I0421 13:23:50.226697 2808 sgd_solver.cpp:105] Iteration 0, lr = 1e-06
I really don't understand why loss goes to -nan
The text was updated successfully, but these errors were encountered: