Skip to content

Commit

Permalink
fix conflict of merging BVLC#3948
Browse files Browse the repository at this point in the history
  • Loading branch information
weiliu89 committed Apr 7, 2016
2 parents cf455be + 10f9273 commit b1678f3
Show file tree
Hide file tree
Showing 34 changed files with 3,270 additions and 53 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -364,9 +364,9 @@ ifeq ($(BLAS), mkl)
# MKL
LIBRARIES += mkl_rt
COMMON_FLAGS += -DUSE_MKL
MKL_DIR ?= /opt/intel/mkl
BLAS_INCLUDE ?= $(MKL_DIR)/include
BLAS_LIB ?= $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
MKLROOT ?= /opt/intel/mkl
BLAS_INCLUDE ?= $(MKLROOT)/include
BLAS_LIB ?= $(MKLROOT)/lib $(MKLROOT)/lib/intel64
else ifeq ($(BLAS), open)
# OpenBLAS
LIBRARIES += openblas
Expand Down
4 changes: 2 additions & 2 deletions examples/finetune_flickr_style/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Let's fine-tune the BVLC-distributed CaffeNet model on a different dataset, [Fli
## Explanation

The Flickr-sourced images of the Style dataset are visually very similar to the ImageNet dataset, on which the `bvlc_reference_caffenet` was trained.
Since that model works well for object category classification, we'd like to use it architecture for our style classifier.
Since that model works well for object category classification, we'd like to use this architecture for our style classifier.
We also only have 80,000 images to train on, so we'd like to start with the parameters learned on the 1,000,000 ImageNet images, and fine-tune as needed.
If we give provide the `weights` argument to the `caffe train` command, the pretrained weights will be loaded into our model, matching layers by name.
If we provide the `weights` argument to the `caffe train` command, the pretrained weights will be loaded into our model, matching layers by name.

Because we are predicting 20 classes instead of a 1,000, we do need to change the last layer in the model.
Therefore, we change the name of the last layer from `fc8` to `fc8_flickr` in our prototxt.
Expand Down
7 changes: 4 additions & 3 deletions examples/pascal-multilabel-with-datalayer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,8 @@
}
],
"metadata": {
"description": "Multilabel classification on PASCAL using python data-layers.",
"example_name": "PASCAL Multilabel with python datalayer",
"description": "Multilabel classification on PASCAL VOC using a Python data layer.",
"example_name": "Multilabel Classification with Python Data Layer",
"include_in_docs": true,
"kernelspec": {
"display_name": "Python 2",
Expand All @@ -471,7 +471,8 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"priority": 5
},
"nbformat": 4,
"nbformat_minor": 0
Expand Down
5 changes: 5 additions & 0 deletions include/caffe/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ class Caffe {
static void SetDevice(const int device_id);
// Prints the current GPU status.
static void DeviceQuery();
// Check if specified device is available
static bool CheckDevice(const int device_id);
// Search from start_id to the highest possible device ordinal,
// return the ordinal of the first available device.
static int FindDevice(const int start_id = 0);
// Parallel training info
inline static int solver_count() { return Get().solver_count_; }
inline static void set_solver_count(int val) { Get().solver_count_ = val; }
Expand Down
67 changes: 67 additions & 0 deletions include/caffe/layers/crop_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#ifndef CAFFE_CROP_LAYER_HPP_
#define CAFFE_CROP_LAYER_HPP_

#include <utility>
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Takes a Blob and crop it, to the shape specified by the second input
* Blob, across all dimensions after the specified axis.
*
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
*/

template <typename Dtype>
class CropLayer : public Layer<Dtype> {
public:
explicit CropLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Crop"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

vector<int> offsets;

private:
void crop_copy(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
const vector<int>& offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
Dtype* dest_data,
bool is_forward);

void crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
const vector<int>& offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
Dtype* dest_data,
bool is_forward);
};
} // namespace caffe

#endif // CAFFE_CROP_LAYER_HPP_
155 changes: 155 additions & 0 deletions include/caffe/layers/lstm_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#ifndef CAFFE_LSTM_LAYER_HPP_
#define CAFFE_LSTM_LAYER_HPP_

#include <string>
#include <utility>
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/recurrent_layer.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

template <typename Dtype> class RecurrentLayer;

/**
* @brief Processes sequential inputs using a "Long Short-Term Memory" (LSTM)
* [1] style recurrent neural network (RNN). Implemented as a network
* unrolled the LSTM computation in time.
*
*
* The specific architecture used in this implementation is as described in
* "Learning to Execute" [2], reproduced below:
* i_t := \sigmoid[ W_{hi} * h_{t-1} + W_{xi} * x_t + b_i ]
* f_t := \sigmoid[ W_{hf} * h_{t-1} + W_{xf} * x_t + b_f ]
* o_t := \sigmoid[ W_{ho} * h_{t-1} + W_{xo} * x_t + b_o ]
* g_t := \tanh[ W_{hg} * h_{t-1} + W_{xg} * x_t + b_g ]
* c_t := (f_t .* c_{t-1}) + (i_t .* g_t)
* h_t := o_t .* \tanh[c_t]
* In the implementation, the i, f, o, and g computations are performed as a
* single inner product.
*
* Notably, this implementation lacks the "diagonal" gates, as used in the
* LSTM architectures described by Alex Graves [3] and others.
*
* [1] Hochreiter, Sepp, and Schmidhuber, Jürgen. "Long short-term memory."
* Neural Computation 9, no. 8 (1997): 1735-1780.
*
* [2] Zaremba, Wojciech, and Sutskever, Ilya. "Learning to execute."
* arXiv preprint arXiv:1410.4615 (2014).
*
* [3] Graves, Alex. "Generating sequences with recurrent neural networks."
* arXiv preprint arXiv:1308.0850 (2013).
*/
template <typename Dtype>
class LSTMLayer : public RecurrentLayer<Dtype> {
public:
explicit LSTMLayer(const LayerParameter& param)
: RecurrentLayer<Dtype>(param) {}

virtual inline const char* type() const { return "LSTM"; }

protected:
virtual void FillUnrolledNet(NetParameter* net_param) const;
virtual void RecurrentInputBlobNames(vector<string>* names) const;
virtual void RecurrentOutputBlobNames(vector<string>* names) const;
virtual void RecurrentInputShapes(vector<BlobShape>* shapes) const;
virtual void OutputBlobNames(vector<string>* names) const;
};

/**
* @brief A helper for LSTMLayer: computes a single timestep of the
* non-linearity of the LSTM, producing the updated cell and hidden
* states.
*/
template <typename Dtype>
class LSTMUnitLayer : public Layer<Dtype> {
public:
explicit LSTMUnitLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "LSTMUnit"; }
virtual inline int ExactNumBottomBlobs() const { return 3; }
virtual inline int ExactNumTopBlobs() const { return 2; }

virtual inline bool AllowForceBackward(const int bottom_index) const {
// Can't propagate to sequence continuation indicators.
return bottom_index != 2;
}

protected:
/**
* @param bottom input Blob vector (length 3)
* -# @f$ (1 \times N \times D) @f$
* the previous timestep cell state @f$ c_{t-1} @f$
* -# @f$ (1 \times N \times 4D) @f$
* the "gate inputs" @f$ [i_t', f_t', o_t', g_t'] @f$
* -# @f$ (1 \times N) @f$
* the sequence continuation indicators @f$ \delta_t @f$
* @param top output Blob vector (length 2)
* -# @f$ (1 \times N \times D) @f$
* the updated cell state @f$ c_t @f$, computed as:
* i_t := \sigmoid[i_t']
* f_t := \sigmoid[f_t']
* o_t := \sigmoid[o_t']
* g_t := \tanh[g_t']
* c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
* -# @f$ (1 \times N \times D) @f$
* the updated hidden state @f$ h_t @f$, computed as:
* h_t := o_t .* \tanh[c_t]
*/
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

/**
* @brief Computes the error gradient w.r.t. the LSTMUnit inputs.
*
* @param top output Blob vector (length 2), providing the error gradient with
* respect to the outputs
* -# @f$ (1 \times N \times D) @f$:
* containing error gradients @f$ \frac{\partial E}{\partial c_t} @f$
* with respect to the updated cell state @f$ c_t @f$
* -# @f$ (1 \times N \times D) @f$:
* containing error gradients @f$ \frac{\partial E}{\partial h_t} @f$
* with respect to the updated cell state @f$ h_t @f$
* @param propagate_down see Layer::Backward.
* @param bottom input Blob vector (length 3), into which the error gradients
* with respect to the LSTMUnit inputs @f$ c_{t-1} @f$ and the gate
* inputs are computed. Computatation of the error gradients w.r.t.
* the sequence indicators is not implemented.
* -# @f$ (1 \times N \times D) @f$
* the error gradient w.r.t. the previous timestep cell state
* @f$ c_{t-1} @f$
* -# @f$ (1 \times N \times 4D) @f$
* the error gradient w.r.t. the "gate inputs"
* @f$ [
* \frac{\partial E}{\partial i_t}
* \frac{\partial E}{\partial f_t}
* \frac{\partial E}{\partial o_t}
* \frac{\partial E}{\partial g_t}
* ] @f$
* -# @f$ (1 \times 1 \times N) @f$
* the gradient w.r.t. the sequence continuation indicators
* @f$ \delta_t @f$ is currently not computed.
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

/// @brief The hidden and output dimension.
int hidden_dim_;
Blob<Dtype> X_acts_;
};

} // namespace caffe

#endif // CAFFE_LSTM_LAYER_HPP_
Loading

0 comments on commit b1678f3

Please sign in to comment.