diff --git a/.gitignore b/.gitignore index 73bba6cb364..28f2aca854b 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ # QtCreator files *.user +# PyCharm files +.idea + # OSX dir files .DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index adea37be565..54b044d347b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ caffe_option(CPU_ONLY "Build Caffe wihtout CUDA support" OFF) # TODO: rename to caffe_option(USE_CUDNN "Build Caffe with cuDNN libary support" ON IF NOT CPU_ONLY) caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON) caffe_option(BUILD_python "Build Python wrapper" ON) +set(python_version "2" CACHE STRING "Specify which python version to use") caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE) caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE) @@ -24,7 +25,7 @@ caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE) include(cmake/Dependencies.cmake) # ---[ Flags -if(UNIX OR APLE) +if(UNIX OR APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall") endif() diff --git a/Makefile b/Makefile index 29827270baf..cb396794e37 100644 --- a/Makefile +++ b/Makefile @@ -261,7 +261,8 @@ ifneq (,$(findstring clang++,$(CXX))) else ifneq (,$(findstring g++,$(CXX))) STATIC_LINK_COMMAND := -Wl,--whole-archive $(STATIC_NAME) -Wl,--no-whole-archive else - $(error Cannot static link with the $(CXX) compiler.) + # The following line must not be indented with a tab, since we are not inside a target + $(error Cannot static link with the $(CXX) compiler) endif # Debugging @@ -319,7 +320,7 @@ else # 10.10 has accelerate while 10.9 has veclib XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep -o 'version: 6') ifneq (,$(findstring version: 6,$(XCODE_CLT_VER))) - BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/ + BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/ LDFLAGS += -framework Accelerate else BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/ @@ -450,6 +451,7 @@ $(MAT$(PROJECT)_SO): $(MAT$(PROJECT)_SRC) $(STATIC_NAME) CXXLIBS="\$$CXXLIBS $(STATIC_LINK_COMMAND) $(LDFLAGS)" -output $@ runtest: $(TEST_ALL_BIN) + $(TOOL_BUILD_DIR)/caffe $(TEST_ALL_BIN) $(TEST_GPUID) --gtest_shuffle $(TEST_FILTER) pytest: py @@ -537,7 +539,12 @@ $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR) @ $(RM) $@ @ ln -s $(abspath $<) $@ -$(TOOL_BINS) $(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME) +$(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME) + @ echo CXX/LD -o $@ + $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \ + -Wl,-rpath,$(ORIGIN)/../lib + +$(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME) @ echo CXX/LD -o $@ $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \ -Wl,-rpath,$(ORIGIN)/../../lib diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index aa2dcbe1d0d..b1ac96c6777 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -92,12 +92,39 @@ endif() # ---[ Python if(BUILD_python) - # disable Python 3 search - find_package(PythonInterp 2.7) - find_package(PythonLibs 2.7) - find_package(NumPy 1.7.1) - find_package(Boost 1.46 COMPONENTS python) - + if(NOT "${python_version}" VERSION_LESS "3.0.0") + # use python3 + find_package(PythonInterp 3.0) + find_package(PythonLibs 3.0) + find_package(NumPy 1.7.1) + # Find the matching boost python implementation + set(version ${PYTHONLIBS_VERSION_STRING}) + + STRING( REPLACE "." "" boost_py_version ${version} ) + find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}") + set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND}) + + while(NOT "${version}" STREQUAL "" AND NOT Boost_PYTHON_FOUND) + STRING( REGEX REPLACE "([0-9.]+).[0-9]+" "\\1" version ${version} ) + STRING( REGEX MATCHALL "([0-9.]+).[0-9]+" has_more_version ${version} ) + if("${has_more_version}" STREQUAL "") + break() + endif() + + STRING( REPLACE "." "" boost_py_version ${version} ) + find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}") + set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND}) + endwhile() + if(NOT Boost_PYTHON_FOUND) + find_package(Boost 1.46 COMPONENTS python) + endif() + else() + # disable Python 3 search + find_package(PythonInterp 2.7) + find_package(PythonLibs 2.7) + find_package(NumPy 1.7.1) + find_package(Boost 1.46 COMPONENTS python) + endif() if(PYTHONLIBS_FOUND AND NUMPY_FOUND AND Boost_PYTHON_FOUND) set(HAVE_PYTHON TRUE) endif() diff --git a/cmake/Misc.cmake b/cmake/Misc.cmake index 608a5f13a79..39569eaf996 100644 --- a/cmake/Misc.cmake +++ b/cmake/Misc.cmake @@ -32,6 +32,11 @@ endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOLEAN "Use link paths for shared library rpath") set(CMAKE_MACOSX_RPATH TRUE) +list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES ${CMAKE_INSTALL_PREFIX}/lib __is_systtem_dir) +if(${__is_systtem_dir} STREQUAL -1) + set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib) +endif() + # ---[ Funny target if(UNIX OR APPLE) add_custom_target(symlink_to_build COMMAND "ln" "-sf" "${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/build" diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 3f7dff6b6e0..32931942846 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -107,8 +107,9 @@ function(caffe_print_configuration_summary) caffe_status(" C++ compiler : ${CMAKE_CXX_COMPILER}") caffe_status(" Release CXX flags : ${__flags_rel}") caffe_status(" Debug CXX flags : ${__flags_deb}") - caffe_status(" BUILD_SHARED_LIBS : ${BUILD_SHARED_LIBS}") caffe_status(" Build type : ${CMAKE_BUILD_TYPE}") + caffe_status("") + caffe_status(" BUILD_SHARED_LIBS : ${BUILD_SHARED_LIBS}") caffe_status(" BUILD_python : ${BUILD_python}") caffe_status(" BUILD_matlab : ${BUILD_matlab}") caffe_status(" BUILD_docs : ${BUILD_docs}") @@ -116,8 +117,9 @@ function(caffe_print_configuration_summary) caffe_status("") caffe_status("Dependencies:") caffe_status(" BLAS : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})") + caffe_status(" Boost : Yes (ver. ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION})") caffe_status(" glog : Yes") - caffe_status(" gflags : Yes") + caffe_status(" gflags : Yes") caffe_status(" protobuf : " PROTOBUF_FOUND THEN "Yes (ver. ${PROTOBUF_VERSION})" ELSE "No" ) caffe_status(" lmdb : " LMDB_FOUND THEN "Yes (ver. ${LMDB_VERSION})" ELSE "No") caffe_status(" Snappy : " SNAPPY_FOUND THEN "Yes (ver. ${Snappy_VERSION})" ELSE "No" ) diff --git a/data/coco/README.md b/data/coco/README.md new file mode 100644 index 00000000000..53d36707f2e --- /dev/null +++ b/data/coco/README.md @@ -0,0 +1,24 @@ +For details about the Microsoft COCO ("Common Objects in Context") dataset [1], +visit mscoco.org. This README provides instructions for downloading and +installing the tools and dataset. + +1) Download and extract the COCO Python tools by running: + + ./download_tools.sh + +2) Install the tools, and optionally download the data by running: + + cd tools + python setup.py install # follow prompts to download or skip data + +3) Download train/val/test splits using: + + ./get_coco2014_aux.sh + +(or see the COCO README (tools/README) for more information). + + +[1] Lin, Tsung-Yi, Michael Maire, Serge Belongie, James Hays, Pietro Perona, + Deva Ramanan, Piotr Dollár, and C. Lawrence Zitnick. + "Microsoft COCO: Common Objects in Context." + arXiv preprint arXiv:1405.0312 (2014). diff --git a/data/coco/download_tools.sh b/data/coco/download_tools.sh new file mode 100755 index 00000000000..c90bc1a2624 --- /dev/null +++ b/data/coco/download_tools.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +wget http://msvocds.blob.core.windows.net/annotations-0-9/tools.zip +unzip tools.zip +rm tools.zip diff --git a/data/coco/get_coco2014_aux.sh b/data/coco/get_coco2014_aux.sh new file mode 100755 index 00000000000..91cf3e1ef0d --- /dev/null +++ b/data/coco/get_coco2014_aux.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env sh +# +# Downloads Andrej Karpathy's train/val/test splits of COCO2014 as text files. + +echo "Downloading..." + +wget http://dl.caffe.berkeleyvision.org/coco2014_aux.tar.gz + +echo "Unzipping..." + +tar -xf coco2014_aux.tar.gz && rm -f coco2014_aux.tar.gz + +echo "Done." diff --git a/docs/installation.md b/docs/installation.md index 16575b54029..144e6a34f67 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -30,7 +30,7 @@ Caffe has several dependencies. Pycaffe and Matcaffe interfaces have their own natural needs. -* For Python Caffe: `Python 2.7`, `numpy (>= 1.7)`, boost-provided `boost.python` +* For Python Caffe: `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python` * For MATLAB Caffe: MATLAB with the `mex` compiler. **cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. For now cuDNN v1 is integrated but see [PR #1731](https://github.com/BVLC/caffe/pull/1731) for v2. @@ -69,7 +69,7 @@ but we suggest first installing the [Anaconda](https://store.continuum.io/cshop/ To import the `caffe` Python module after completing the installation, add the module directory to your `$PYTHONPATH` by `export PYTHONPATH=/path/to/caffe/python:$PYTHONPATH` or the like. You should not import the module in the `caffe/python/caffe` directory! -*Caffe's Python interface works with Python 2.7. Python 3 or earlier Pythons are your own adventure.* +*Caffe's Python interface works with Python 2.7. Python 3.3+ should work out of the box without protobuf support. For protobuf support please install protobuf 3.0 alpha (https://developers.google.com/protocol-buffers/). Earlier Pythons are your own adventure.* #### MATLAB diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md index 34bb48050e8..839939f5ad6 100644 --- a/docs/tutorial/layers.md +++ b/docs/tutorial/layers.md @@ -453,20 +453,20 @@ The `SLICE` layer is a utility layer that slices an input layer to multiple outp * Sample - layers { - name: "slicer_label" - type: SLICE - bottom: "label" - ## Example of label with a shape N x 3 x 1 x 1 - top: "label1" - top: "label2" - top: "label3" - slice_param { - slice_dim: 1 - slice_point: 1 - slice_point: 2 - } - } + layers { + name: "slicer_label" + type: SLICE + bottom: "label" + ## Example of label with a shape N x 3 x 1 x 1 + top: "label1" + top: "label2" + top: "label3" + slice_param { + slice_dim: 1 + slice_point: 1 + slice_point: 2 + } + } `slice_dim` indicates the target dimension and can assume only two values: 0 for num or 1 for channel; `slice_point` indicates indexes in the selected dimension (the number of indexes must be equal to the number of top blobs minus one). diff --git a/examples/coco_caption/.gitignore b/examples/coco_caption/.gitignore new file mode 100644 index 00000000000..e040331b7f2 --- /dev/null +++ b/examples/coco_caption/.gitignore @@ -0,0 +1 @@ +h5_data/ diff --git a/examples/coco_caption/coco_to_hdf5_data.py b/examples/coco_caption/coco_to_hdf5_data.py new file mode 100755 index 00000000000..233ee829078 --- /dev/null +++ b/examples/coco_caption/coco_to_hdf5_data.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python + +from hashlib import sha1 +import os +import random +random.seed(3) +import re +import sys + +sys.path.append('./examples/coco_caption/') + +COCO_PATH = './data/coco/tools' +COCO_TOOL_PATH = '%s/pycocotools' % COCO_PATH + +MAX_HASH = 100000 + +sys.path.append(COCO_TOOL_PATH) +from coco import COCO + +from hdf5_sequence_generator import SequenceGenerator, HDF5SequenceWriter + +# UNK_IDENTIFIER is the word used to identify unknown words +UNK_IDENTIFIER = '' + +SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)') +def split_sentence(sentence): + # break sentence into a list of words and punctuation + sentence = [s.lower() for s in SENTENCE_SPLIT_REGEX.split(sentence.strip()) if len(s.strip()) > 0] + # remove the '.' from the end of the sentence + if sentence[-1] != '.': + # print "Warning: sentence doesn't end with '.'; ends with: %s" % sentence[-1] + return sentence + return sentence[:-1] + +MAX_WORDS = 20 + +class CocoSequenceGenerator(SequenceGenerator): + def __init__(self, coco, batch_num_streams, vocab=None, + max_words=MAX_WORDS, align=True, shuffle=True, gt_captions=True, + pad=True, truncate=True, split_ids=None): + self.max_words = max_words + num_empty_lines = 0 + self.images = [] + num_total = 0 + num_missing = 0 + num_captions = 0 + known_images = {} + image_root = '%s/%s' % (COCO_PATH, coco.image_folder) + if split_ids is None: + split_ids = coco.images.keys() + for image_id in split_ids: + image_info = coco.images[image_id] + image_path = '%s/%s/%s' % \ + (image_root, image_info['file_path'], image_info['file_name']) + if os.path.isfile(image_path): + assert image_id not in known_images # no duplicates allowed + known_images[image_id] = {} + known_images[image_id]['path'] = image_path + if gt_captions: + known_images[image_id]['sentences'] = [split_sentence(anno['sentence']) + for anno in coco.image_to_annotations[image_id]] + num_captions += len(known_images[image_id]['sentences']) + else: + known_images[image_id]['sentences'] = [] + else: + num_missing += 1 + print 'Warning (#%d): image not found: %s' % (num_missing, image_path) + num_total += 1 + print '%d/%d images missing' % (num_missing, num_total) + if vocab is None: + self.init_vocabulary(known_images) + else: + self.vocabulary_inverted = vocab + self.vocabulary = {} + for index, word in enumerate(self.vocabulary_inverted): + self.vocabulary[word] = index + self.image_sentence_pairs = [] + num_no_sentences = 0 + for image_filename, metadata in known_images.iteritems(): + if not metadata['sentences']: + num_no_sentences += 1 + print 'Warning (#%d): image with no sentences: %s' % (num_no_sentences, image_filename) + for sentence in metadata['sentences']: + self.image_sentence_pairs.append((metadata['path'], sentence)) + self.index = 0 + self.num_resets = 0 + self.num_truncates = 0 + self.num_pads = 0 + self.num_outs = 0 + self.image_list = [] + SequenceGenerator.__init__(self) + self.batch_num_streams = batch_num_streams + # make the number of image/sentence pairs a multiple of the buffer size + # so each timestep of each batch is useful and we can align the images + if align: + num_pairs = len(self.image_sentence_pairs) + remainder = num_pairs % batch_num_streams + if remainder > 0: + num_needed = batch_num_streams - remainder + for i in range(num_needed): + choice = random.randint(0, num_pairs - 1) + self.image_sentence_pairs.append(self.image_sentence_pairs[choice]) + assert len(self.image_sentence_pairs) % batch_num_streams == 0 + if shuffle: + random.shuffle(self.image_sentence_pairs) + self.pad = pad + self.truncate = truncate + self.negative_one_padded_streams = frozenset(('input_sentence', 'target_sentence')) + + def streams_exhausted(self): + return self.num_resets > 0 + + def init_vocabulary(self, image_annotations, min_count=5): + words_to_count = {} + for image_id, annotations in image_annotations.iteritems(): + for annotation in annotations['sentences']: + for word in annotation: + word = word.strip() + if word not in words_to_count: + words_to_count[word] = 0 + words_to_count[word] += 1 + # Sort words by count, then alphabetically + words_by_count = sorted(words_to_count.keys(), key=lambda w: (-words_to_count[w], w)) + print 'Initialized vocabulary with %d words; top 10 words:' % len(words_by_count) + for word in words_by_count[:10]: + print '\t%s (%d)' % (word, words_to_count[word]) + # Add words to vocabulary + self.vocabulary = {UNK_IDENTIFIER: 0} + self.vocabulary_inverted = [UNK_IDENTIFIER] + for index, word in enumerate(words_by_count): + word = word.strip() + if words_to_count[word] < min_count: + break + self.vocabulary_inverted.append(word) + self.vocabulary[word] = index + 1 + print 'Final vocabulary (restricted to words with counts of %d+) has %d words' % \ + (min_count, len(self.vocabulary)) + + def dump_vocabulary(self, vocab_filename): + print 'Dumping vocabulary to file: %s' % vocab_filename + with open(vocab_filename, 'wb') as vocab_file: + for word in self.vocabulary_inverted: + vocab_file.write('%s\n' % word) + print 'Done.' + + def dump_image_file(self, image_filename, dummy_image_filename=None): + print 'Dumping image list to file: %s' % image_filename + with open(image_filename, 'wb') as image_file: + for image_path, _ in self.image_list: + image_file.write('%s\n' % image_path) + if dummy_image_filename is not None: + print 'Dumping image list with dummy labels to file: %s' % dummy_image_filename + with open(dummy_image_filename, 'wb') as image_file: + for path_and_hash in self.image_list: + image_file.write('%s %d\n' % path_and_hash) + print 'Done.' + + def next_line(self): + num_lines = float(len(self.image_sentence_pairs)) + self.index += 1 + if self.index == 1 or self.index == num_lines or self.index % 10000 == 0: + print 'Processed %d/%d (%f%%) lines' % (self.index, num_lines, + 100 * self.index / num_lines) + if self.index == num_lines: + self.index = 0 + self.num_resets += 1 + + def line_to_stream(self, sentence): + stream = [] + for word in sentence: + word = word.strip() + if word in self.vocabulary: + stream.append(self.vocabulary[word]) + else: # unknown word; append UNK + stream.append(self.vocabulary[UNK_IDENTIFIER]) + # increment the stream -- 0 will be the EOS character + stream = [s + 1 for s in stream] + return stream + + def get_pad_value(self, stream_name): + return -1 if stream_name in self.negative_one_padded_streams else 0 + + def get_streams(self): + image_filename, line = self.image_sentence_pairs[self.index] + stream = self.line_to_stream(line) + pad = self.max_words - (len(stream) + 1) if self.pad else 0 + if pad > 0: self.num_pads += 1 + self.num_outs += 1 + out = {} + out['stage_indicators'] = [1] * (len(stream) + 1) + [0] * pad + out['cont_sentence'] = [0] + [1] * len(stream) + [0] * pad + out['input_sentence'] = [0] + stream + [-1] * pad + out['target_sentence'] = stream + [0] + [-1] * pad + truncated = False + if self.truncate: + for key, val in out.iteritems(): + if len(val) > self.max_words: + out[key] = val[:self.max_words] + truncated = True + self.num_truncates += truncated + image_hash = self.image_hash(image_filename) + out['hashed_image_path'] = [image_hash] * len(out['input_sentence']) + self.image_list.append((image_filename, image_hash)) + self.next_line() + return out + + def image_hash(self, filename): + image_hash = int(sha1(filename).hexdigest(), 16) % MAX_HASH + assert image_hash == float(image_hash) + return image_hash + +COCO_ANNO_PATH = '%s/annotations/sentences_%%s2014.json' % COCO_PATH +COCO_IMAGE_PATTERN = '%s/images/%%s2014' % COCO_PATH +COCO_IMAGE_ID_PATTERN = 'COCO_%s2014_%%012d.jpg' + +BUFFER_SIZE = 100 +OUTPUT_DIR = './examples/coco_caption/h5_data/buffer_%d' % BUFFER_SIZE +SPLITS_PATTERN = './data/coco/coco2014_cocoid.%s.txt' +OUTPUT_DIR_PATTERN = '%s/%%s_batches' % OUTPUT_DIR + +def preprocess_dataset(split_name, coco_split_name, batch_stream_length, + vocab=None, aligned=True): + with open(SPLITS_PATTERN % split_name, 'r') as split_file: + split_image_ids = [int(line) for line in split_file.readlines()] + output_dataset_name = split_name + if aligned: + output_dataset_name += '_aligned_%d' % MAX_WORDS + else: + output_dataset_name += '_unaligned' + output_path = OUTPUT_DIR_PATTERN % output_dataset_name + coco = COCO(COCO_ANNO_PATH % coco_split_name) + sg = CocoSequenceGenerator(coco, BUFFER_SIZE, split_ids=split_image_ids, + vocab=vocab, align=aligned, pad=aligned, truncate=aligned) + sg.batch_stream_length = batch_stream_length + writer = HDF5SequenceWriter(sg, output_dir=output_path) + writer.write_to_exhaustion() + writer.write_filelists() + if vocab is None: + vocab_out_path = '%s/vocabulary.txt' % OUTPUT_DIR + sg.dump_vocabulary(vocab_out_path) + image_out_path = '%s/image_list.txt' % output_path + image_dummy_labels_out_path = '%s/image_list.with_dummy_labels.txt' % output_path + sg.dump_image_file(image_out_path, image_dummy_labels_out_path) + num_outs = sg.num_outs + num_pads = sg.num_pads + num_truncates = sg.num_truncates + print 'Padded %d/%d sequences; truncated %d/%d sequences' % \ + (num_pads, num_outs, num_truncates, num_outs) + return sg.vocabulary_inverted + +def preprocess_coco(): + vocab = None + DATASETS = [ + ('train', 'train', 100000, True), + ('val', 'val', 100000, True), + ('test', 'val', 100000, True), + # Write unaligned datasets as well: + ('train', 'train', 100000, False), + ('val', 'val', 100000, False), + ('test', 'val', 100000, False), + ] + for split_name, coco_split_name, batch_stream_length, aligned in DATASETS: + vocab = preprocess_dataset(split_name, coco_split_name, batch_stream_length, + vocab=vocab, aligned=aligned) + +if __name__ == "__main__": + preprocess_coco() diff --git a/examples/coco_caption/hdf5_sequence_generator.py b/examples/coco_caption/hdf5_sequence_generator.py new file mode 100644 index 00000000000..98d4657b6bf --- /dev/null +++ b/examples/coco_caption/hdf5_sequence_generator.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python + +import h5py +import numpy as np +import os +import random +import sys + +class SequenceGenerator(): + def __init__(self): + self.dimension = 10 + self.batch_stream_length = 2000 + self.batch_num_streams = 8 + self.min_stream_length = 13 + self.max_stream_length = 17 + self.substream_names = None + self.streams_initialized = False + + def streams_exhausted(self): + return False + + def init_streams(self): + self.streams = [None] * self.batch_num_streams + self.stream_indices = [0] * self.batch_num_streams + self.reset_stream(0) + self.streams_initialized = True + + def reset_stream(self, stream_index): + streams = self.get_streams() + stream_names = sorted(streams.keys()) + if self.substream_names is None: + assert len(stream_names) > 0 + self.substream_names = stream_names + assert self.substream_names == stream_names + if self.streams[stream_index] is None: + self.streams[stream_index] = {} + stream_length = len(streams[stream_names[0]]) + for k, v in streams.iteritems(): + assert stream_length == len(v) + self.streams[stream_index][k] = v + self.stream_indices[stream_index] = 0 + + # Pad with zeroes by default -- override this to pad with soemthing else + # for a particular stream + def get_pad_value(self, stream_name): + return 0 + + def get_next_batch(self, truncate_at_exhaustion=True): + if not self.streams_initialized: + self.init_streams() + batch_size = self.batch_num_streams * self.batch_stream_length + batch = {} + batch_indicators = np.zeros((self.batch_stream_length, self.batch_num_streams)) + for name in self.substream_names: + batch[name] = self.get_pad_value(name) * np.ones_like(batch_indicators) + exhausted = [False] * self.batch_num_streams + all_exhausted = False + reached_exhaustion = False + num_completed_streams = 0 + for t in range(self.batch_stream_length): + all_exhausted = True + for i in range(self.batch_num_streams): + if not exhausted[i]: + if self.streams[i] is None or \ + self.stream_indices[i] == len(self.streams[i][self.substream_names[0]]): + self.stream_indices[i] = 0 + reached_exhaustion = reached_exhaustion or self.streams_exhausted() + if reached_exhaustion: exhausted[i] = True + if not reached_exhaustion or not truncate_at_exhaustion: + self.reset_stream(i) + else: + continue + for name in self.substream_names: + batch[name][t, i] = self.streams[i][name][self.stream_indices[i]] + batch_indicators[t, i] = 0 if self.stream_indices[i] == 0 else 1 + self.stream_indices[i] += 1 + if self.stream_indices[i] == len(self.streams[i][self.substream_names[0]]): + num_completed_streams += 1 + if not exhausted[i]: all_exhausted = False + if all_exhausted and truncate_at_exhaustion: + print ('Exhausted all data; cutting off batch at timestep %d ' + + 'with %d streams completed') % (t, num_completed_streams) + for name in self.substream_names: + batch[name] = batch[name][:t, :] + batch_indicators = batch_indicators[:t, :] + break + return batch, batch_indicators + + def get_streams(self): + raise Exception('get_streams should be overridden to return a dict ' + + 'of equal-length iterables.') + +class HDF5SequenceWriter(): + def __init__(self, sequence_generator, output_dir=None, verbose=False): + self.generator = sequence_generator + assert output_dir is not None # required + self.output_dir = output_dir + if os.path.exists(output_dir): + raise Exception('Output directory already exists: ' + output_dir) + os.makedirs(output_dir) + self.verbose = verbose + self.filenames = [] + + def write_batch(self, stop_at_exhaustion=False): + batch_comps, cont_indicators = self.generator.get_next_batch() + batch_index = len(self.filenames) + filename = '%s/batch_%d.h5' % (self.output_dir, batch_index) + self.filenames.append(filename) + h5file = h5py.File(filename, 'w') + dataset = h5file.create_dataset('cont', shape=cont_indicators.shape, dtype=cont_indicators.dtype) + dataset[:] = cont_indicators + dataset = h5file.create_dataset('buffer_size', shape=(1,), dtype=np.int) + dataset[:] = self.generator.batch_num_streams + for key, batch in batch_comps.iteritems(): + if self.verbose: + for s in range(self.generator.batch_num_streams): + stream = np.array(self.generator.streams[s][key]) + print 'batch %d, stream %s, index %d: ' % (batch_index, key, s), stream + h5dataset = h5file.create_dataset(key, shape=batch.shape, dtype=batch.dtype) + h5dataset[:] = batch + h5file.close() + + def write_to_exhaustion(self): + while not self.generator.streams_exhausted(): + self.write_batch(stop_at_exhaustion=True) + + def write_filelists(self): + assert self.filenames is not None + filelist_filename = '%s/hdf5_chunk_list.txt' % self.output_dir + with open(filelist_filename, 'w') as listfile: + for filename in self.filenames: + listfile.write('%s\n' % filename) diff --git a/examples/coco_caption/lrcn.prototxt b/examples/coco_caption/lrcn.prototxt new file mode 100644 index 00000000000..62d08a2738d --- /dev/null +++ b/examples/coco_caption/lrcn.prototxt @@ -0,0 +1,767 @@ +# The network is used for the image captioning experiments of LRCN [1]. +# Please consider citing LRCN [1] if you use this example in your work. +# +# [1] J. Donahue, L. A. Hendricks, S. Guadarrama, M. Rohrbach, S. Venugopalan, +# K. Saenko, T. Darrell. "Long-term Recurrent Convolutional Networks for +# Visual Recognition and Description." arXiv preprint arXiv:1411.4389 (2014). + +name: "lrcn_caffenet_to_lstm" +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { phase: TRAIN } + transform_param { + mirror: true + crop_size: 227 + mean_value: 104 + mean_value: 117 + mean_value: 123 + } + image_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/train_aligned_20_batches/image_list.with_dummy_labels.txt" + batch_size: 100 + new_height: 256 + new_width: 256 + } +} +layer { + name: "data" + type: "HDF5Data" + top: "cont_sentence" + top: "input_sentence" + top: "target_sentence" + include { phase: TRAIN } + hdf5_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/train_aligned_20_batches/hdf5_chunk_list.txt" + batch_size: 20 + } +} +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TEST + stage: "test-on-train" + } + transform_param { + mirror: true + crop_size: 227 + mean_value: 104 + mean_value: 117 + mean_value: 123 + } + image_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/train_aligned_20_batches/image_list.with_dummy_labels.txt" + batch_size: 100 + new_height: 256 + new_width: 256 + } +} +layer { + name: "data" + type: "HDF5Data" + top: "cont_sentence" + top: "input_sentence" + top: "target_sentence" + include { + phase: TEST + stage: "test-on-train" + } + hdf5_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/train_aligned_20_batches/hdf5_chunk_list.txt" + batch_size: 20 + } +} +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TEST + stage: "test-on-val" + } + transform_param { + mirror: true + crop_size: 227 + mean_value: 104 + mean_value: 117 + mean_value: 123 + } + image_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/val_aligned_20_batches/image_list.with_dummy_labels.txt" + batch_size: 100 + new_height: 256 + new_width: 256 + } +} +layer { + name: "data" + type: "HDF5Data" + top: "cont_sentence" + top: "input_sentence" + top: "target_sentence" + include { + phase: TEST + stage: "test-on-val" + } + hdf5_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/val_aligned_20_batches/hdf5_chunk_list.txt" + batch_size: 20 + } +} +layer { + name: "silence" + type: "Silence" + bottom: "label" +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + convolution_param { + num_output: 96 + kernel_size: 11 + stride: 4 + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + convolution_param { + num_output: 96 + kernel_size: 11 + stride: 4 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "norm1" + type: "LRN" + bottom: "pool1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "norm1" + top: "conv2" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + convolution_param { + num_output: 256 + pad: 2 + kernel_size: 5 + group: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "norm1" + top: "conv2" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + convolution_param { + num_output: 256 + pad: 2 + kernel_size: 5 + group: 2 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "norm2" + type: "LRN" + bottom: "pool2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "norm2" + top: "conv3" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "norm2" + top: "conv3" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + group: 2 + } +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + group: 2 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 2 + } +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 2 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} +layer { + name: "pool5" + type: "Pooling" + bottom: "conv5" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + inner_product_param { + num_output: 4096 + weight_filler { + type: "gaussian" + std: 0.005 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + include { stage: "freeze-convnet" } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + exclude { stage: "freeze-convnet" } + inner_product_param { + num_output: 4096 + weight_filler { + type: "gaussian" + std: 0.005 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc8" + type: "InnerProduct" + bottom: "fc7" + top: "fc8" + param { + lr_mult: 0.1 + decay_mult: 1 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + # exclude { stage: "freeze-convnet" } + inner_product_param { + num_output: 1000 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "embedding" + type: "Embed" + bottom: "input_sentence" + top: "embedded_input_sentence" + param { + lr_mult: 1 + } + embed_param { + bias_term: false + input_dim: 8801 + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + } +} +layer { + name: "lstm1" + type: "LSTM" + bottom: "embedded_input_sentence" + bottom: "cont_sentence" + bottom: "fc8" + top: "lstm1" + include { stage: "unfactored" } + recurrent_param { + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "lstm2" + type: "LSTM" + bottom: "lstm1" + bottom: "cont_sentence" + top: "lstm2" + include { + stage: "unfactored" + stage: "2-layer" + } + recurrent_param { + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "lstm1" + type: "LSTM" + bottom: "embedded_input_sentence" + bottom: "cont_sentence" + top: "lstm1" + include { stage: "factored" } + recurrent_param { + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "lstm2" + type: "LSTM" + bottom: "lstm1" + bottom: "cont_sentence" + bottom: "fc8" + top: "lstm2" + include { stage: "factored" } + recurrent_param { + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "predict" + type: "InnerProduct" + bottom: "lstm1" + top: "predict" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + exclude { stage: "2-layer" } + inner_product_param { + num_output: 8801 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + axis: 2 + } +} +layer { + name: "predict" + type: "InnerProduct" + bottom: "lstm2" + top: "predict" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + include { stage: "2-layer" } + inner_product_param { + num_output: 8801 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + axis: 2 + } +} +layer { + name: "cross_entropy_loss" + type: "SoftmaxWithLoss" + bottom: "predict" + bottom: "target_sentence" + top: "cross_entropy_loss" + loss_weight: 20 + loss_param { + ignore_label: -1 + } + softmax_param { + axis: 2 + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "predict" + bottom: "target_sentence" + top: "accuracy" + include { phase: TEST } + loss_param { + ignore_label: -1 + } +} diff --git a/examples/coco_caption/lrcn_solver.prototxt b/examples/coco_caption/lrcn_solver.prototxt new file mode 100644 index 00000000000..65ca272b30c --- /dev/null +++ b/examples/coco_caption/lrcn_solver.prototxt @@ -0,0 +1,30 @@ +net: "./examples/coco_caption/lrcn.prototxt" + +# lrcn.prototxt supports three variants of the LRCN architecture: +# (1) stage: 'factored' stage: '2-layer' +# (2) stage: 'unfactored' stage: '1-layer' +# (3) stage: 'unfactored' stage: '2-layer' +# This solver uses variant (1). +# To use a different variant, modify the states (train_state, test_state) +# below as appropriate: + +train_state: { stage: 'freeze-convnet' stage: 'factored' stage: '2-layer' } +test_iter: 25 +test_state: { stage: 'freeze-convnet' stage: 'factored' stage: '2-layer' stage: 'test-on-train' } +test_iter: 25 +test_state: { stage: 'freeze-convnet' stage: 'factored' stage: '2-layer' stage: 'test-on-val' } +test_interval: 1000 +base_lr: 0.01 +lr_policy: "step" +gamma: 0.5 +stepsize: 20000 +display: 1 +max_iter: 110000 +momentum: 0.9 +weight_decay: 0.0000 +snapshot: 5000 +snapshot_prefix: "./examples/coco_caption/lrcn" +solver_mode: GPU +random_seed: 1701 +average_loss: 100 +clip_gradients: 10 diff --git a/examples/coco_caption/lstm_language_model.prototxt b/examples/coco_caption/lstm_language_model.prototxt new file mode 100644 index 00000000000..3cf4f6a686f --- /dev/null +++ b/examples/coco_caption/lstm_language_model.prototxt @@ -0,0 +1,149 @@ +name: "lstm_language_model" +layer { + name: "data" + type: "HDF5Data" + top: "cont_sentence" + top: "input_sentence" + top: "target_sentence" + include { phase: TRAIN } + hdf5_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/train_unaligned_batches/hdf5_chunk_list.txt" + batch_size: 20 + } +} +layer { + name: "data" + type: "HDF5Data" + top: "cont_sentence" + top: "input_sentence" + top: "target_sentence" + include { + phase: TEST + stage: "test-on-train" + } + hdf5_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/train_unaligned_batches/hdf5_chunk_list.txt" + batch_size: 20 + } +} +layer { + name: "data" + type: "HDF5Data" + top: "cont_sentence" + top: "input_sentence" + top: "target_sentence" + include { + phase: TEST + stage: "test-on-val" + } + hdf5_data_param { + source: "./examples/coco_caption/h5_data/buffer_100/val_unaligned_batches/hdf5_chunk_list.txt" + batch_size: 20 + } +} +layer { + name: "embedding" + type: "Embed" + bottom: "input_sentence" + top: "embedded_input_sentence" + param { + lr_mult: 1 + } + embed_param { + bias_term: false + input_dim: 8801 # = vocab_size + 1 (for EOS) + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + } +} +layer { + name: "embed-drop" + type: "Dropout" + bottom: "embedded_input_sentence" + top: "embedded_input_sentence" + dropout_param { dropout_ratio: 0.5 } + include { stage: "embed-drop" } +} +layer { + name: "lstm1" + type: "LSTM" + bottom: "embedded_input_sentence" + bottom: "cont_sentence" + top: "lstm1" + recurrent_param { + num_output: 1000 + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "lstm-drop" + type: "Dropout" + bottom: "lstm1" + top: "lstm1" + dropout_param { dropout_ratio: 0.5 } + include { stage: "lstm-drop" } +} +layer { + name: "predict" + type: "InnerProduct" + bottom: "lstm1" + top: "predict" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 8801 # = vocab_size + 1 (+1 for EOS) + weight_filler { + type: "uniform" + min: -0.08 + max: 0.08 + } + bias_filler { + type: "constant" + value: 0 + } + axis: 2 + } +} +layer { + name: "cross_entropy_loss" + type: "SoftmaxWithLoss" + bottom: "predict" + bottom: "target_sentence" + top: "cross_entropy_loss" + loss_weight: 20 + loss_param { + ignore_label: -1 + } + softmax_param { + axis: 2 + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "predict" + bottom: "target_sentence" + top: "accuracy" + include { phase: TEST } + loss_param { + ignore_label: -1 + } +} diff --git a/examples/coco_caption/lstm_lm_solver.prototxt b/examples/coco_caption/lstm_lm_solver.prototxt new file mode 100644 index 00000000000..fb36ad15a5b --- /dev/null +++ b/examples/coco_caption/lstm_lm_solver.prototxt @@ -0,0 +1,21 @@ +net: "./examples/coco_caption/lstm_language_model.prototxt" +train_state: { stage: 'embed-drop' stage: 'lstm-drop' } +test_iter: 25 +test_state: { stage: 'test-on-train' } +test_iter: 25 +test_state: { stage: 'test-on-val' } +test_interval: 1000 +base_lr: 0.1 +lr_policy: "step" +gamma: 0.5 +stepsize: 20000 +display: 1 +max_iter: 110000 +momentum: 0.9 +weight_decay: 0.0000 +snapshot: 5000 +snapshot_prefix: "./examples/coco_caption/lstm_lm" +solver_mode: GPU +random_seed: 1701 +average_loss: 100 +clip_gradients: 10 diff --git a/examples/coco_caption/train_language_model.sh b/examples/coco_caption/train_language_model.sh new file mode 100755 index 00000000000..6e8a8c47b37 --- /dev/null +++ b/examples/coco_caption/train_language_model.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +GPU_ID=0 +DATA_DIR=./examples/coco_caption/h5_data/ +if [ ! -d $DATA_DIR ]; then + echo "Data directory not found: $DATA_DIR" + echo "First, download the COCO dataset (follow instructions in data/coco)" + echo "Then, run ./examples/coco_caption/coco_to_hdf5_data.py to create the Caffe input data" + exit 1 +fi + +./build/tools/caffe train \ + -solver ./examples/coco_caption/lstm_lm_solver.prototxt \ + -gpu $GPU_ID diff --git a/examples/coco_caption/train_lrcn.sh b/examples/coco_caption/train_lrcn.sh new file mode 100755 index 00000000000..5099e762ccd --- /dev/null +++ b/examples/coco_caption/train_lrcn.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +GPU_ID=0 +WEIGHTS=\ +./models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel +DATA_DIR=./examples/coco_caption/h5_data/ +if [ ! -d $DATA_DIR ]; then + echo "Data directory not found: $DATA_DIR" + echo "First, download the COCO dataset (follow instructions in data/coco)" + echo "Then, run ./examples/coco_caption/coco_to_hdf5_data.py to create the Caffe input data" + exit 1 +fi + +./build/tools/caffe train \ + -solver ./examples/coco_caption/lrcn_solver.prototxt \ + -weights $WEIGHTS \ + -gpu $GPU_ID diff --git a/examples/imagenet/readme.md b/examples/imagenet/readme.md index c2dd62ec963..a6bdf49ca4d 100644 --- a/examples/imagenet/readme.md +++ b/examples/imagenet/readme.md @@ -26,7 +26,7 @@ We assume that you already have downloaded the ImageNet training data and valida You will first need to prepare some auxiliary data for training. This data can be downloaded by: - ./data/get_ilsvrc_aux.sh + ./data/ilsvrc12/get_ilsvrc_aux.sh The training and validation input are described in `train.txt` and `val.txt` as text listing all the files and their labels. Note that we use a different indexing for labels than the ILSVRC devkit: we sort the synset names in their ASCII order, and then label them from 0 to 999. See `synset_words.txt` for the synset/name mapping. diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index 42e4420408c..36579a5a545 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -1,11 +1,17 @@ #ifndef CAFFE_BLOB_HPP_ #define CAFFE_BLOB_HPP_ +#include +#include +#include + #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/syncedmem.hpp" #include "caffe/util/math_functions.hpp" +const int kMaxBlobAxes = INT_MAX; + namespace caffe { /** @@ -19,10 +25,16 @@ template class Blob { public: Blob() - : data_(), diff_(), num_(0), channels_(0), height_(0), width_(0), - count_(0), capacity_(0) {} + : data_(), diff_(), count_(0), capacity_(0) {} + + /// @brief Deprecated; use Blob(const vector& shape). explicit Blob(const int num, const int channels, const int height, - const int width); + const int width); + explicit Blob(const vector& shape); + + /// @brief Deprecated; use Reshape(const vector& shape). + void Reshape(const int num, const int channels, const int height, + const int width); /** * @brief Change the dimensions of the blob, allocating new memory if * necessary. @@ -37,25 +49,133 @@ class Blob { * an error; either Net::Forward or Net::Reshape need to be called to * propagate the new input shape to higher layers. */ - void Reshape(const int num, const int channels, const int height, - const int width); + void Reshape(const vector& shape); + void Reshape(const BlobShape& shape); void ReshapeLike(const Blob& other); - inline int num() const { return num_; } - inline int channels() const { return channels_; } - inline int height() const { return height_; } - inline int width() const { return width_; } + inline string shape_string() const { + ostringstream stream; + for (int i = 0; i < shape_.size(); ++i) { + stream << shape_[i] << " "; + } + stream << "(" << count_ << ")"; + return stream.str(); + } + inline const vector& shape() const { return shape_; } + /** + * @brief Returns the dimension of the index-th axis (or the negative index-th + * axis from the end, if index is negative). + * + * @param index the axis index, which may be negative as it will be + * "canonicalized" using CanonicalAxisIndex. + * Dies on out of range index. + */ + inline int shape(int index) const { + return shape_[CanonicalAxisIndex(index)]; + } + inline int num_axes() const { return shape_.size(); } inline int count() const { return count_; } + + /** + * @brief Compute the volume of a slice; i.e., the product of dimensions + * among a range of axes. + * + * @param start_axis The first axis to include in the slice. + * + * @param end_axis The first axis to exclude from the slice. + */ + inline int count(int start_axis, int end_axis) const { + CHECK_LE(start_axis, end_axis); + CHECK_GE(start_axis, 0); + CHECK_GE(end_axis, 0); + CHECK_LE(start_axis, num_axes()); + CHECK_LE(end_axis, num_axes()); + int count = 1; + for (int i = start_axis; i < end_axis; ++i) { + count *= shape(i); + } + return count; + } + /** + * @brief Compute the volume of a slice spanning from a particular first + * axis to the final axis. + * + * @param start_axis The first axis to include in the slice. + */ + inline int count(int start_axis) const { + return count(start_axis, num_axes()); + } + + /** + * @brief Returns the 'canonical' version of a (usually) user-specified axis, + * allowing for negative indexing (e.g., -1 for the last axis). + * + * @param index the axis index. + * If 0 <= index < num_axes(), return index. + * If -num_axes <= index <= -1, return (num_axes() - (-index)), + * e.g., the last axis index (num_axes() - 1) if index == -1, + * the second to last if index == -2, etc. + * Dies on out of range index. + */ + inline int CanonicalAxisIndex(int axis_index) const { + CHECK_GE(axis_index, -num_axes()) + << "axis " << axis_index << " out of range for " << num_axes() + << "-D Blob with shape " << shape_string(); + CHECK_LT(axis_index, num_axes()) + << "axis " << axis_index << " out of range for " << num_axes() + << "-D Blob with shape " << shape_string(); + if (axis_index < 0) { + return axis_index + num_axes(); + } + return axis_index; + } + + /// @brief Deprecated legacy shape accessor num: use shape(0) instead. + inline int num() const { return LegacyShape(0); } + /// @brief Deprecated legacy shape accessor channels: use shape(1) instead. + inline int channels() const { return LegacyShape(1); } + /// @brief Deprecated legacy shape accessor height: use shape(2) instead. + inline int height() const { return LegacyShape(2); } + /// @brief Deprecated legacy shape accessor width: use shape(3) instead. + inline int width() const { return LegacyShape(3); } + inline int LegacyShape(int index) const { + CHECK_LE(num_axes(), 4) + << "Cannot use legacy accessors on Blobs with > 4 axes."; + CHECK_LT(index, 4); + CHECK_GE(index, -4); + if (index >= num_axes() || index < -num_axes()) { + // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse + // indexing) -- this special case simulates the one-padding used to fill + // extraneous axes of legacy blobs. + return 1; + } + return shape(index); + } + inline int offset(const int n, const int c = 0, const int h = 0, const int w = 0) const { CHECK_GE(n, 0); - CHECK_LE(n, num_); - CHECK_GE(channels_, 0); - CHECK_LE(c, channels_); - CHECK_GE(height_, 0); - CHECK_LE(h, height_); - CHECK_GE(width_, 0); - CHECK_LE(w, width_); - return ((n * channels_ + c) * height_ + h) * width_ + w; + CHECK_LE(n, num()); + CHECK_GE(channels(), 0); + CHECK_LE(c, channels()); + CHECK_GE(height(), 0); + CHECK_LE(h, height()); + CHECK_GE(width(), 0); + CHECK_LE(w, width()); + return ((n * channels() + c) * height() + h) * width() + w; + } + + inline int offset(const vector& indices) const { + CHECK_LE(indices.size(), num_axes()); + int offset = 0; + for (int i = 0; i < num_axes(); ++i) { + offset *= shape(i); + if (indices.size() > i) { + CHECK_GE(indices[i], 0); + CHECK_LT(indices[i], shape(i)); + offset += indices[i]; + } + } + return offset; } /** * @brief Copy from a source Blob. @@ -71,12 +191,20 @@ class Blob { inline Dtype data_at(const int n, const int c, const int h, const int w) const { - return *(cpu_data() + offset(n, c, h, w)); + return cpu_data()[offset(n, c, h, w)]; } inline Dtype diff_at(const int n, const int c, const int h, const int w) const { - return *(cpu_diff() + offset(n, c, h, w)); + return cpu_diff()[offset(n, c, h, w)]; + } + + inline Dtype data_at(const vector& index) const { + return cpu_data()[offset(index)]; + } + + inline Dtype diff_at(const vector& index) const { + return cpu_diff()[offset(index)]; } inline const shared_ptr& data() const { @@ -99,7 +227,7 @@ class Blob { Dtype* mutable_cpu_diff(); Dtype* mutable_gpu_diff(); void Update(); - void FromProto(const BlobProto& proto); + void FromProto(const BlobProto& proto, bool reshape = true); void ToProto(BlobProto* proto, bool write_diff = false) const; /// @brief Compute the sum of absolute values (L1 norm) of the data. @@ -135,13 +263,12 @@ class Blob { */ void ShareDiff(const Blob& other); + bool ShapeEquals(const BlobProto& other); + protected: shared_ptr data_; shared_ptr diff_; - int num_; - int channels_; - int height_; - int width_; + vector shape_; int count_; int capacity_; diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index c67822c3738..a23c671b7e9 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -85,7 +85,7 @@ class ConcatLayer : public Layer { const vector*>& top); virtual inline const char* type() const { return "Concat"; } - virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MinBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: @@ -99,8 +99,8 @@ class ConcatLayer : public Layer { * - K @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x_K @f$ * @param top output Blob vector (length 1) - * -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or - * @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1: + * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or + * @f$ (N \times KC \times H \times W) @f$ if axis == 1: * the concatenated output @f$ * y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}] * @f$ @@ -115,8 +115,8 @@ class ConcatLayer : public Layer { * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs - * -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or - * @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1: + * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or + * @f$ (N \times KC \times H \times W) @f$ if axis == 1: * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ * with respect to concatenated outputs @f$ y @f$ * @param propagate_down see Layer::Backward. @@ -137,13 +137,10 @@ class ConcatLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - Blob col_bob_; int count_; - int num_; - int channels_; - int height_; - int width_; - int concat_dim_; + int num_concats_; + int concat_input_size_; + int concat_axis_; }; /** @@ -179,10 +176,49 @@ class EltwiseLayer : public Layer { EltwiseParameter_EltwiseOp op_; vector coeffs_; Blob max_idx_; + bool coeff_blob_; bool stable_prod_grad_; }; +/** + * @brief A layer for learning "embeddings" of one-hot vector input. + * Equivalent to an InnerProductLayer with one-hot vectors as input, but + * for efficiency the input is the "hot" index of each column itself. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class EmbedLayer : public Layer { + public: + explicit EmbedLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Embed"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int M_; + int K_; + int N_; + bool bias_term_; + Blob bias_multiplier_; +}; + /** * @brief Reshapes the input Blob into flat vectors. * @@ -215,9 +251,9 @@ class FlattenLayer : public Layer { * the outputs -- i.e., the (virtually) copied, flattened inputs */ virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); + const vector*>& top) {} virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); + const vector*>& top) {} /** * @brief Computes the error gradient w.r.t. the concatenate inputs. @@ -229,11 +265,9 @@ class FlattenLayer : public Layer { * gradient is (virtually) copied */ virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); + const vector& propagate_down, const vector*>& bottom) {} virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int count_; + const vector& propagate_down, const vector*>& bottom) {} }; /** @@ -306,6 +340,53 @@ class MVNLayer : public Layer { Blob sum_multiplier_; }; +/** + * @brief Reshapes an input Blob. + */ +template +class ReshapeLayer : public Layer { + public: + explicit ReshapeLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Reshape"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (D_1 \times D_2 \times ... \times D_m) @f$ + * the inputs + * @param top output Blob vector (length 1) + * -# @f$ (d_1 \times d_2 \times ... \times d_n) @f$, + * the outputs -- i.e., the (virtually) copied inputs. + * The shape is specified by reshape_param.shape(), and the + * product of the dimensions in the new shape must match that of the + * input shape; i.e., @f$ d_1 d_2 ... d_n = D_1 D_2 ... D_m @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top) {} + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top) {} + + /** + * @brief Computes the error gradient w.r.t. the concatenate inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length K), into which the top error + * gradient is (virtually) copied + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} +}; + /** * @brief Ignores bottom blobs while producing no top blobs. (This is useful * to suppress outputs during testing.) @@ -362,6 +443,9 @@ class SoftmaxLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); + int outer_num_; + int inner_num_; + int softmax_axis_; /// sum_multiplier is used to carry out sum using BLAS Blob sum_multiplier_; /// scale is an intermediate Blob to hold temporary results. @@ -446,7 +530,7 @@ class SliceLayer : public Layer { virtual inline const char* type() const { return "Slice"; } virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 2; } + virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -458,13 +542,10 @@ class SliceLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - Blob col_bob_; int count_; - int num_; - int channels_; - int height_; - int width_; - int slice_dim_; + int num_slices_; + int slice_size_; + int slice_axis_; vector slice_point_; }; diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index eebf565b1d5..bb18e8e1e28 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -79,9 +79,8 @@ class GaussianFiller : public Filler { // These have num == channels == 1; width is number of inputs; height is // number of outputs. The 'sparse' variable specifies the mean number // of non-zero input weights for a given output. - CHECK_EQ(blob->num(), 1); - CHECK_EQ(blob->channels(), 1); - int num_outputs = blob->height(); + CHECK_GE(blob->num_axes(), 1); + const int num_outputs = blob->shape(0); Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs); rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int))); int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 36413ccd176..4277269c938 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -78,7 +78,13 @@ class AccuracyLayer : public Layer { } } + /// Whether to ignore instances with a certain label. + bool has_ignore_label_; + /// The label indicating that an instance should be ignored. + int ignore_label_; + int top_k_; + Dtype denominator_; }; /** @@ -754,6 +760,8 @@ class SoftmaxWithLossLayer : public LossLayer { /// Whether to normalize the loss by the total number of values present /// (otherwise just by the batch size). bool normalize_; + + int softmax_axis_, outer_num_, inner_num_; }; } // namespace caffe diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 075afebc9b0..10fc1705192 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -84,6 +84,13 @@ class Net { /// @brief Updates the network weights based on the diff values computed. void Update(); + /** + * @brief Shares weight data of owner blobs with shared blobs. + * + * Note: this is called by Net::Init, and thus should normally not be + * called manually. + */ + void ShareWeightData(); /** * @brief For an already initialized net, implicitly copies (i.e., using no @@ -150,6 +157,9 @@ class Net { return param_names_index_; } inline const vector& param_owners() const { return param_owners_; } + inline const vector& param_display_names() const { + return param_display_names_; + } /// @brief Input and output blob numbers inline int num_inputs() const { return net_input_blobs_.size(); } inline int num_outputs() const { return net_output_blobs_.size(); } diff --git a/include/caffe/sequence_layers.hpp b/include/caffe/sequence_layers.hpp new file mode 100644 index 00000000000..8ac735435a4 --- /dev/null +++ b/include/caffe/sequence_layers.hpp @@ -0,0 +1,312 @@ +#ifndef CAFFE_SEQUENCE_LAYERS_HPP_ +#define CAFFE_SEQUENCE_LAYERS_HPP_ + +#include +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/net.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +template class RecurrentLayer; + +/** + * @brief An abstract class for implementing recurrent behavior inside of an + * unrolled network. This Layer type cannot be instantiated -- instaed, + * you should use one of its implementations which defines the recurrent + * architecture, such as RNNLayer or LSTMLayer. + */ +template +class RecurrentLayer : public Layer { + public: + explicit RecurrentLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual void Reset(); + + virtual inline const char* type() const { return "Recurrent"; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MaxBottomBlobs() const { return 3; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + virtual inline bool AllowForceBackward(const int bottom_index) const { + // Can't propagate to sequence continuation indicators. + return bottom_index != 1; + } + + protected: + /** + * @brief Fills net_param with the recurrent network arcthiecture. Subclasses + * should define this -- see RNNLayer and LSTMLayer for examples. + */ + virtual void FillUnrolledNet(NetParameter* net_param) const = 0; + + /** + * @brief Fills names with the names of the 0th timestep recurrent input + * Blob&s. Subclasses should define this -- see RNNLayer and LSTMLayer + * for examples. + */ + virtual void RecurrentInputBlobNames(vector* names) const = 0; + + /** + * @brief Fills names with the names of the Tth timestep recurrent output + * Blob&s. Subclasses should define this -- see RNNLayer and LSTMLayer + * for examples. + */ + virtual void RecurrentOutputBlobNames(vector* names) const = 0; + + /** + * @brief Fills names with the names of the output blobs, concatenated across + * all timesteps. Should return a name for each top Blob. + * Subclasses should define this -- see RNNLayer and LSTMLayer for + * examples. + */ + virtual void OutputBlobNames(vector* names) const = 0; + + /** + * @param bottom input Blob vector (length 2-3) + * + * -# @f$ (T \times N \times ...) @f$ + * the time-varying input @f$ x @f$. After the first two axes, whose + * dimensions must correspond to the number of timesteps @f$ T @f$ and + * the number of independent streams @f$ N @f$, respectively, its + * dimensions may be arbitrary. Note that the ordering of dimensions -- + * @f$ (T \times N \times ...) @f$, rather than + * @f$ (N \times T \times ...) @f$ -- means that the @f$ N @f$ + * independent input streams must be "interleaved". + * + * -# @f$ (T \times N) @f$ + * the sequence continuation indicators @f$ \delta @f$. + * These inputs should be binary (0 or 1) indicators, where + * @f$ \delta_{t,n} = 0 @f$ means that timestep @f$ t @f$ of stream + * @f$ n @f$ is the beginning of a new sequence, and hence the previous + * hidden state @f$ h_{t-1} @f$ is multiplied by @f$ \delta_t = 0 @f$ + * and has no effect on the cell's output at timestep @f$ t @f$, and + * a value of @f$ \delta_{t,n} = 1 @f$ means that timestep @f$ t @f$ of + * stream @f$ n @f$ is a continuation from the previous timestep + * @f$ t-1 @f$, and the previous hidden state @f$ h_{t-1} @f$ affects the + * updated hidden state and output. + * + * -# @f$ (N \times ...) @f$ (optional) + * the static (non-time-varying) input @f$ x_{static} @f$. + * After the first axis, whose dimension must be the number of + * independent streams, its dimensions may be arbitrary. + * This is mathematically equivalent to using a time-varying input of + * @f$ x'_t = [x_t; x_{static}] @f$ -- i.e., tiling the static input + * across the @f$ T @f$ timesteps and concatenating with the time-varying + * input. Note that if this input is used, all timesteps in a single + * batch within a particular one of the @f$ N @f$ streams must share the + * same static input, even if the sequence continuation indicators + * suggest that difference sequences are ending and beginning within a + * single batch. This may require padding and/or truncation for uniform + * length. + * + * @param top output Blob vector (length 1) + * -# @f$ (T \times N \times D) @f$ + * the time-varying output @f$ y @f$, where @f$ D @f$ is + * recurrent_param.num_output(). + * Refer to documentation for particular RecurrentLayer implementations + * (such as RNNLayer and LSTMLayer) for the definition of @f$ y @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// @brief A helper function, useful for stringifying timestep indices. + virtual string int_to_str(const int t) const; + + /// @brief A Net to implement the Recurrent functionality. + shared_ptr > unrolled_net_; + + /// @brief The number of independent streams to process simultaneously. + int N_; + + /** + * @brief The number of timesteps in the layer's input, and the number of + * timesteps over which to backpropagate through time. + */ + int T_; + + /// @brief Whether the layer has a "static" input copied across all timesteps. + bool static_input_; + + vector* > recur_input_blobs_; + vector* > recur_output_blobs_; + vector* > output_blobs_; + Blob* x_input_blob_; + Blob* x_static_input_blob_; + Blob* cont_input_blob_; +}; + +/** + * @brief Processes sequential inputs using a "Long Short-Term Memory" (LSTM) + * [1] style recurrent neural network (RNN). Implemented as a network + * unrolled the LSTM computation in time. + * + * + * The specific architecture used in this implementation is as described in + * "Learning to Execute" [2], reproduced below: + * i_t := \sigmoid[ W_{hi} * h_{t-1} + W_{xi} * x_t + b_i ] + * f_t := \sigmoid[ W_{hf} * h_{t-1} + W_{xf} * x_t + b_f ] + * o_t := \sigmoid[ W_{ho} * h_{t-1} + W_{xo} * x_t + b_o ] + * g_t := \tanh[ W_{hg} * h_{t-1} + W_{xg} * x_t + b_g ] + * c_t := (f_t .* c_{t-1}) + (i_t .* g_t) + * h_t := o_t .* \tanh[c_t] + * In the implementation, the i, f, o, and g computations are performed as a + * single inner product. + * + * Notably, this implementation lacks the "diagonal" gates, as used in the + * LSTM architectures described by Alex Graves [3] and others. + * + * [1] Hochreiter, Sepp, and Schmidhuber, Jürgen. "Long short-term memory." + * Neural Computation 9, no. 8 (1997): 1735-1780. + * + * [2] Zaremba, Wojciech, and Sutskever, Ilya. "Learning to execute." + * arXiv preprint arXiv:1410.4615 (2014). + * + * [3] Graves, Alex. "Generating sequences with recurrent neural networks." + * arXiv preprint arXiv:1308.0850 (2013). + */ +template +class LSTMLayer : public RecurrentLayer { + public: + explicit LSTMLayer(const LayerParameter& param) + : RecurrentLayer(param) {} + + virtual inline const char* type() const { return "LSTM"; } + + protected: + virtual void FillUnrolledNet(NetParameter* net_param) const; + virtual void RecurrentInputBlobNames(vector* names) const; + virtual void RecurrentOutputBlobNames(vector* names) const; + virtual void OutputBlobNames(vector* names) const; +}; + +/** + * @brief A helper for LSTMLayer: computes a single timestep of the + * non-linearity of the LSTM, producing the updated cell and hidden + * states. + */ +template +class LSTMUnitLayer : public Layer { + public: + explicit LSTMUnitLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "LSTMUnit"; } + virtual inline int ExactNumBottomBlobs() const { return 3; } + virtual inline int ExactNumTopBlobs() const { return 2; } + + virtual inline bool AllowForceBackward(const int bottom_index) const { + // Can't propagate to sequence continuation indicators. + return bottom_index != 2; + } + + protected: + /** + * @param bottom input Blob vector (length 3) + * -# @f$ (1 \times N \times D) @f$ + * the previous timestep cell state @f$ c_{t-1} @f$ + * -# @f$ (1 \times N \times 4D) @f$ + * the "gate inputs" @f$ [i_t', f_t', o_t', g_t'] @f$ + * -# @f$ (1 \times 1 \times N) @f$ + * the sequence continuation indicators @f$ \delta_t @f$ + * @param top output Blob vector (length 2) + * -# @f$ (1 \times N \times D) @f$ + * the updated cell state @f$ c_t @f$, computed as: + * i_t := \sigmoid[i_t'] + * f_t := \sigmoid[f_t'] + * o_t := \sigmoid[o_t'] + * g_t := \tanh[g_t'] + * c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) + * -# @f$ (1 \times N \times D) @f$ + * the updated hidden state @f$ h_t @f$, computed as: + * h_t := o_t .* \tanh[c_t] + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the LSTMUnit inputs. + * + * @param top output Blob vector (length 2), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times N \times D) @f$: + * containing error gradients @f$ \frac{\partial E}{\partial c_t} @f$ + * with respect to the updated cell state @f$ c_t @f$ + * -# @f$ (1 \times N \times D) @f$: + * containing error gradients @f$ \frac{\partial E}{\partial h_t} @f$ + * with respect to the updated cell state @f$ h_t @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 3), into which the error gradients + * with respect to the LSTMUnit inputs @f$ c_{t-1} @f$ and the gate + * inputs are computed. Computatation of the error gradients w.r.t. + * the sequence indicators is not implemented. + * -# @f$ (1 \times N \times D) @f$ + * the error gradient w.r.t. the previous timestep cell state + * @f$ c_{t-1} @f$ + * -# @f$ (1 \times N \times 4D) @f$ + * the error gradient w.r.t. the "gate inputs" + * @f$ [ + * \frac{\partial E}{\partial i_t} + * \frac{\partial E}{\partial f_t} + * \frac{\partial E}{\partial o_t} + * \frac{\partial E}{\partial g_t} + * ] @f$ + * -# @f$ (1 \times 1 \times N) @f$ + * the gradient w.r.t. the sequence continuation indicators + * @f$ \delta_t @f$ is currently not computed. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// @brief The hidden and output dimension. + int hidden_dim_; + Blob X_acts_; +}; + +/** + * @brief Processes time-varying inputs using a simple recurrent neural network + * (RNN). Implemented as a network unrolling the RNN computation in time. + * + * Given time-varying inputs @f$ x_t @f$, computes hidden state @f$ + * h_t := \tanh[ W_{hh} h_{t_1} + W_{xh} x_t + b_h ] + * @f$, and outputs @f$ + * o_t := \tanh[ W_{ho} h_t + b_o ] + * @f$. + */ +template +class RNNLayer : public RecurrentLayer { + public: + explicit RNNLayer(const LayerParameter& param) + : RecurrentLayer(param) {} + + virtual inline const char* type() const { return "RNN"; } + + protected: + virtual void FillUnrolledNet(NetParameter* net_param) const; + virtual void RecurrentInputBlobNames(vector* names) const; + virtual void RecurrentOutputBlobNames(vector* names) const; + virtual void OutputBlobNames(vector* names) const; +}; + +} // namespace caffe + +#endif // CAFFE_SEQUENCE_LAYERS_HPP_ diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp index 22937711b58..25f35d1589e 100644 --- a/include/caffe/test/test_gradient_check_util.hpp +++ b/include/caffe/test/test_gradient_check_util.hpp @@ -45,6 +45,10 @@ class GradientChecker { void CheckGradientEltwise(Layer* layer, const vector*>& bottom, const vector*>& top); + // Checks the gradient of a single output with respect to particular input + // blob(s). If check_bottom = i >= 0, check only the ith bottom Blob. + // If check_bottom == -1, check everything -- all bottom Blobs and all + // param Blobs. Otherwise (if check_bottom < -1), check only param Blobs. void CheckGradientSingle(Layer* layer, const vector*>& bottom, const vector*>& top, int check_bottom, int top_id, int top_data_id, bool element_wise = false); @@ -80,21 +84,25 @@ void GradientChecker::CheckGradientSingle(Layer* layer, CHECK_EQ(top_count, bottom[blob_id]->count()); } } - // First, figure out what blobs we need to check against. + // First, figure out what blobs we need to check against, and zero init + // parameter blobs. vector*> blobs_to_check; - vector propagate_down(bottom.size(), check_bottom < 0); + vector propagate_down(bottom.size(), check_bottom == -1); for (int i = 0; i < layer->blobs().size(); ++i) { - blobs_to_check.push_back(layer->blobs()[i].get()); + Blob* blob = layer->blobs()[i].get(); + caffe_set(blob->count(), static_cast(0), blob->mutable_cpu_diff()); + blobs_to_check.push_back(blob); } - if (check_bottom < 0) { + if (check_bottom == -1) { for (int i = 0; i < bottom.size(); ++i) { blobs_to_check.push_back(bottom[i]); } - } else { + } else if (check_bottom >= 0) { CHECK_LT(check_bottom, bottom.size()); blobs_to_check.push_back(bottom[check_bottom]); propagate_down[check_bottom] = true; } + CHECK_GT(blobs_to_check.size(), 0) << "No blobs to check."; // Compute the gradient analytically using Backward Caffe::set_random_seed(seed_); // Ignore the loss from the layer (it's just the weighted sum of the losses diff --git a/include/caffe/util/gpu_util.cuh b/include/caffe/util/gpu_util.cuh new file mode 100644 index 00000000000..994202f2a1a --- /dev/null +++ b/include/caffe/util/gpu_util.cuh @@ -0,0 +1,35 @@ +#ifndef CAFFE_UTIL_GPU_UTIL_H_ +#define CAFFE_UTIL_GPU_UTIL_H_ + +namespace caffe { + +template +inline __device__ Dtype caffe_gpu_atomic_add(const Dtype val, Dtype* address); + +template <> +inline __device__ +float caffe_gpu_atomic_add(const float val, float* address) { + return atomicAdd(address, val); +} + +// double atomicAdd implementation taken from: +// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#axzz3PVCpVsEG +template <> +inline __device__ +double caffe_gpu_atomic_add(const double val, double* address) { + unsigned long long int* address_as_ull = // NOLINT(runtime/int) + // NOLINT_NEXT_LINE(runtime/int) + reinterpret_cast(address); + unsigned long long int old = *address_as_ull; // NOLINT(runtime/int) + unsigned long long int assumed; // NOLINT(runtime/int) + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} + +} // namespace caffe + +#endif // CAFFE_UTIL_GPU_UTIL_H_ diff --git a/matlab/caffe/matcaffe.cpp b/matlab/caffe/matcaffe.cpp index 996d3d2149c..da37d920b20 100644 --- a/matlab/caffe/matcaffe.cpp +++ b/matlab/caffe/matcaffe.cpp @@ -272,7 +272,7 @@ static void get_init_key(MEX_ARGS) { static void init(MEX_ARGS) { if (nrhs != 3) { ostringstream error_msg; - error_msg << "Expected 2 arguments, got " << nrhs; + error_msg << "Expected 3 arguments, got " << nrhs; mex_error(error_msg.str()); } diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6afed4fa183..df0401daa1c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -22,9 +22,13 @@ if(UNIX OR APPLE) endif() # ---[ Install -file(GLOB files *.py requirements.txt) -install(FILES ${files} DESTINATION python) -install(DIRECTORY caffe DESTINATION python) -install(TARGETS pycaffe DESTINATION python/caffe) +file(GLOB files1 *.py requirements.txt) +install(FILES ${files1} DESTINATION python) + +file(GLOB files2 caffe/*.py) +install(FILES ${files2} DESTINATION python/caffe) +install(TARGETS pycaffe DESTINATION python/caffe) +install(DIRECTORY caffe/imagenet caffe/proto caffe/test DESTINATION python/caffe) + diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index a5d0e64605e..bfea0de661b 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -163,9 +164,10 @@ struct NdarrayCallPolicies : public bp::default_call_policies { // the shape information from the blob. void* data = PyArray_DATA(reinterpret_cast(result)); Py_DECREF(result); - npy_intp dims[] = {blob->num(), blob->channels(), - blob->height(), blob->width()}; - PyObject* arr_obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32, data); + const int num_axes = blob->num_axes(); + vector dims(blob->shape().begin(), blob->shape().end()); + PyObject *arr_obj = PyArray_SimpleNewFromData(num_axes, dims.data(), + NPY_FLOAT32, data); // SetBaseObject steals a ref, so we need to INCREF. Py_INCREF(pyblob.ptr()); PyArray_SetBaseObject(reinterpret_cast(arr_obj), @@ -174,6 +176,20 @@ struct NdarrayCallPolicies : public bp::default_call_policies { } }; +bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) { + if (bp::len(kwargs) > 0) { + throw std::runtime_error("Blob.reshape takes no kwargs"); + } + Blob* self = bp::extract*>(args[0]); + vector shape(bp::len(args) - 1); + for (int i = 1; i < bp::len(args); ++i) { + shape[i - 1] = bp::extract(args[i]); + } + self->Reshape(shape); + // We need to explicitly return None to use bp::raw_function. + return bp::object(); +} + BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1); BOOST_PYTHON_MODULE(_caffe) { @@ -218,8 +234,9 @@ BOOST_PYTHON_MODULE(_caffe) { .add_property("channels", &Blob::channels) .add_property("height", &Blob::height) .add_property("width", &Blob::width) - .add_property("count", &Blob::count) - .def("reshape", &Blob::Reshape) + .add_property("count", static_cast::*)() const>( + &Blob::count)) + .def("reshape", bp::raw_function(&Blob_Reshape)) .add_property("data", bp::make_function(&Blob::mutable_cpu_data, NdarrayCallPolicies())) .add_property("diff", bp::make_function(&Blob::mutable_cpu_diff, @@ -275,7 +292,9 @@ BOOST_PYTHON_MODULE(_caffe) { bp::class_ >("BoolVec") .def(bp::vector_indexing_suite >()); - import_array(); + // boost python expects a void (missing) return value, while import_array + // returns NULL for python3. import_array1() forces a void return value. + import_array1(); } } // namespace caffe diff --git a/python/caffe/io.py b/python/caffe/io.py index 0ce9ecfeeed..f7f75b73cd8 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -3,7 +3,14 @@ from scipy.ndimage import zoom from skimage.transform import resize -from caffe.proto import caffe_pb2 +try: + # Python3 will most likely not be able to load protobuf + from caffe.proto import caffe_pb2 +except: + if sys.version_info >= (3,0): + print("Failed to include caffe_pb2, things might go wrong!") + else: + raise ## proto / datum / ndarray conversion @@ -231,11 +238,16 @@ def set_mean(self, in_, mean): """ self.__check_input(in_) if mean.ndim == 1: + # broadcast pixel mean = mean[:, np.newaxis, np.newaxis] - mk, mh, mw = mean.shape - in_k, in_h, in_w = self.inputs[in_][1:] - #if mk != in_k or (mh, mw) != (in_h, in_w) and (mh, mw) != (1, 1): - # raise Exception('Mean shape incompatible with input shape.') + else: + ms = mean.shape + if len(ms) == 2: + ms = (1,) + ms + if len(ms) != 3: + raise ValueError('Mean shape invalid') + if ms != self.inputs[in_][1:]: + raise ValueError('Mean shape incompatible with input shape.') self.mean[in_] = mean diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 31c145d77a5..3c19261f690 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -4,7 +4,10 @@ """ from collections import OrderedDict -from itertools import izip_longest +try: + from itertools import izip_longest +except: + from itertools import zip_longest as izip_longest import numpy as np from ._caffe import Net, SGDSolver @@ -38,12 +41,12 @@ def _Net_params(self): @property def _Net_inputs(self): - return [self.blobs.keys()[i] for i in self._inputs] + return [list(self.blobs.keys())[i] for i in self._inputs] @property def _Net_outputs(self): - return [self.blobs.keys()[i] for i in self._outputs] + return [list(self.blobs.keys())[i] for i in self._outputs] def _Net_forward(self, blobs=None, start=None, end=None, **kwargs): @@ -82,8 +85,6 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs): # Set input according to defined shapes and make arrays single and # C-contiguous as Caffe expects. for in_, blob in kwargs.iteritems(): - if blob.ndim != 4: - raise Exception('{} blob is not 4-d'.format(in_)) if blob.shape[0] != self.blobs[in_].num: raise Exception('Input is not batch sized') self.blobs[in_].data[...] = blob diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py index 383c283959d..dd99f6f15b9 100644 --- a/python/caffe/test/test_python_layer.py +++ b/python/caffe/test/test_python_layer.py @@ -11,8 +11,7 @@ def setup(self, bottom, top): pass def reshape(self, bottom, top): - top[0].reshape(bottom[0].num, bottom[0].channels, bottom[0].height, - bottom[0].width) + top[0].reshape(*bottom[0].data.shape) def forward(self, bottom, top): top[0].data[...] = 10 * bottom[0].data @@ -21,17 +20,16 @@ def backward(self, top, propagate_down, bottom): bottom[0].diff[...] = 10 * top[0].diff def python_net_file(): - f = tempfile.NamedTemporaryFile(delete=False) - f.write("""name: 'pythonnet' force_backward: true - input: 'data' input_dim: 10 input_dim: 9 input_dim: 8 input_dim: 7 - layer { type: 'Python' name: 'one' bottom: 'data' top: 'one' - python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } - layer { type: 'Python' name: 'two' bottom: 'one' top: 'two' - python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } - layer { type: 'Python' name: 'three' bottom: 'two' top: 'three' - python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""") - f.close() - return f.name + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write("""name: 'pythonnet' force_backward: true + input: 'data' input_shape { dim: 10 dim: 9 dim: 8 } + layer { type: 'Python' name: 'one' bottom: 'data' top: 'one' + python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } + layer { type: 'Python' name: 'two' bottom: 'one' top: 'two' + python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } + layer { type: 'Python' name: 'three' bottom: 'two' top: 'three' + python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""") + return f.name class TestPythonLayer(unittest.TestCase): def setUp(self): diff --git a/python/classify.py b/python/classify.py index d435a572266..81d06369341 100755 --- a/python/classify.py +++ b/python/classify.py @@ -103,7 +103,7 @@ def main(argv): channel_swap=channel_swap) if args.gpu: - print 'GPU mode' + print('GPU mode') # Load numpy array (.npy), directory glob (*.jpg), or image file. args.input_file = os.path.expanduser(args.input_file) @@ -115,12 +115,12 @@ def main(argv): else: inputs = [caffe.io.load_image(args.input_file)] - print "Classifying %d inputs." % len(inputs) + print("Classifying %d inputs." % len(inputs)) # Classify. start = time.time() predictions = classifier.predict(inputs, not args.center_only) - print "Done in %.2f s." % (time.time() - start) + print("Done in %.2f s." % (time.time() - start)) # Save np.save(args.output_file, predictions) diff --git a/python/detect.py b/python/detect.py index cb0c2645761..d395bd97abf 100755 --- a/python/detect.py +++ b/python/detect.py @@ -115,7 +115,7 @@ def main(argv): context_pad=args.context_pad) if args.gpu: - print 'GPU mode' + print('GPU mode') # Load input. t = time.time() diff --git a/python/draw_net.py b/python/draw_net.py index 4457b793e86..6320f775ef7 100755 --- a/python/draw_net.py +++ b/python/draw_net.py @@ -36,7 +36,7 @@ def main(): args = parse_args() net = caffe_pb2.NetParameter() text_format.Merge(open(args.input_net_proto_file).read(), net) - print 'Drawing net to %s' % args.output_image_file + print('Drawing net to %s' % args.output_image_file) caffe.draw.draw_net_to_file(net, args.output_image_file, args.rankdir) diff --git a/scripts/cpp_lint.py b/scripts/cpp_lint.py index 1b7c6c0536c..f750489f4f9 100755 --- a/scripts/cpp_lint.py +++ b/scripts/cpp_lint.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2 # # Copyright (c) 2009 Google Inc. All rights reserved. # diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index fbc1361a19d..6d2b3f502d9 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -1,3 +1,6 @@ +#include +#include + #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/syncedmem.hpp" @@ -8,15 +11,24 @@ namespace caffe { template void Blob::Reshape(const int num, const int channels, const int height, const int width) { - CHECK_GE(num, 0); - CHECK_GE(channels, 0); - CHECK_GE(height, 0); - CHECK_GE(width, 0); - num_ = num; - channels_ = channels; - height_ = height; - width_ = width; - count_ = num_ * channels_ * height_ * width_; + vector shape(4); + shape[0] = num; + shape[1] = channels; + shape[2] = height; + shape[3] = width; + Reshape(shape); +} + +template +void Blob::Reshape(const vector& shape) { + CHECK_LE(shape.size(), kMaxBlobAxes); + count_ = 1; + shape_.resize(shape.size()); + for (int i = 0; i < shape.size(); ++i) { + CHECK_GE(shape[i], 0); + count_ *= shape[i]; + shape_[i] = shape[i]; + } if (count_ > capacity_) { capacity_ = count_; data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); @@ -24,9 +36,19 @@ void Blob::Reshape(const int num, const int channels, const int height, } } +template +void Blob::Reshape(const BlobShape& shape) { + CHECK_LE(shape.dim_size(), kMaxBlobAxes); + vector shape_vec(shape.dim_size()); + for (int i = 0; i < shape.dim_size(); ++i) { + shape_vec[i] = shape.dim(i); + } + Reshape(shape_vec); +} + template void Blob::ReshapeLike(const Blob& other) { - Reshape(other.num(), other.channels(), other.height(), other.width()); + Reshape(other.shape()); } template @@ -37,6 +59,13 @@ Blob::Blob(const int num, const int channels, const int height, Reshape(num, channels, height, width); } +template +Blob::Blob(const vector& shape) + // capacity_ must be initialized before calling Reshape + : capacity_(0) { + Reshape(shape); +} + template const Dtype* Blob::cpu_data() const { CHECK(data_); @@ -345,12 +374,34 @@ void Blob::scale_diff(Dtype scale_factor) { } } +template +bool Blob::ShapeEquals(const BlobProto& other) { + if (other.has_num() || other.has_channels() || + other.has_height() || other.has_width()) { + // Using deprecated 4D Blob dimensions -- + // shape is (num, channels, height, width). + // Note: we do not use the normal Blob::num(), Blob::channels(), etc. + // methods as these index from the beginning of the blob shape, where legacy + // parameter blobs were indexed from the end of the blob shape (e.g., bias + // Blob shape (1 x 1 x 1 x N), IP layer weight Blob shape (1 x 1 x M x N)). + return shape_.size() <= 4 && + LegacyShape(-4) == other.num() && + LegacyShape(-3) == other.channels() && + LegacyShape(-2) == other.height() && + LegacyShape(-1) == other.width(); + } + vector other_shape(other.shape().dim_size()); + for (int i = 0; i < other.shape().dim_size(); ++i) { + other_shape[i] = other.shape().dim(i); + } + return shape_ == other_shape; +} + template void Blob::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { - if (num_ != source.num() || channels_ != source.channels() || - height_ != source.height() || width_ != source.width()) { + if (source.count() != count_ || source.shape() != shape_) { if (reshape) { - Reshape(source.num(), source.channels(), source.height(), source.width()); + ReshapeLike(source); } else { LOG(FATAL) << "Trying to copy blobs of different sizes."; } @@ -380,8 +431,28 @@ void Blob::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { } template -void Blob::FromProto(const BlobProto& proto) { - Reshape(proto.num(), proto.channels(), proto.height(), proto.width()); +void Blob::FromProto(const BlobProto& proto, bool reshape) { + if (reshape) { + vector shape; + if (proto.has_num() || proto.has_channels() || + proto.has_height() || proto.has_width()) { + // Using deprecated 4D Blob dimensions -- + // shape is (num, channels, height, width). + shape.resize(4); + shape[0] = proto.num(); + shape[1] = proto.channels(); + shape[2] = proto.height(); + shape[3] = proto.width(); + } else { + shape.resize(proto.shape().dim_size()); + for (int i = 0; i < proto.shape().dim_size(); ++i) { + shape[i] = proto.shape().dim(i); + } + } + Reshape(shape); + } else { + CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)"; + } // copy data Dtype* data_vec = mutable_cpu_data(); for (int i = 0; i < count_; ++i) { @@ -397,10 +468,10 @@ void Blob::FromProto(const BlobProto& proto) { template void Blob::ToProto(BlobProto* proto, bool write_diff) const { - proto->set_num(num_); - proto->set_channels(channels_); - proto->set_height(height_); - proto->set_width(width_); + proto->clear_shape(); + for (int i = 0; i < shape_.size(); ++i) { + proto->mutable_shape()->add_dim(shape_[i]); + } proto->clear_data(); proto->clear_diff(); const Dtype* data_vec = cpu_data(); diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp index 3e8df34c0d6..539a7f175b6 100644 --- a/src/caffe/layers/accuracy_layer.cpp +++ b/src/caffe/layers/accuracy_layer.cpp @@ -14,19 +14,29 @@ template void AccuracyLayer::LayerSetUp( const vector*>& bottom, const vector*>& top) { top_k_ = this->layer_param_.accuracy_param().top_k(); + denominator_ = this->layer_param_.accuracy_param().denominator(); + CHECK_GE(denominator_, 0) + << "Denominator must be positive; or 0, for the batch size."; + + has_ignore_label_ = + this->layer_param_.loss_param().has_ignore_label(); + if (has_ignore_label_) { + ignore_label_ = this->layer_param_.loss_param().ignore_label(); + } } template void AccuracyLayer::Reshape( const vector*>& bottom, const vector*>& top) { - CHECK_EQ(bottom[0]->num(), bottom[1]->num()) - << "The data and label should have the same number."; - CHECK_LE(top_k_, bottom[0]->count() / bottom[0]->num()) + CHECK_LE(top_k_, bottom[0]->count() / bottom[1]->count()) << "top_k must be less than or equal to the number of classes."; - CHECK_EQ(bottom[1]->channels(), 1); - CHECK_EQ(bottom[1]->height(), 1); - CHECK_EQ(bottom[1]->width(), 1); - top[0]->Reshape(1, 1, 1, 1); + CHECK_GE(bottom[0]->num_axes(), bottom[1]->num_axes()); + for (int i = 0; i < bottom[1]->num_axes(); ++i) { + CHECK_LE(bottom[0]->shape(i), bottom[1]->shape(i)) + << "Dimension mismatch between predictions and label."; + } + vector top_shape(0); // Accuracy is a scalar; 0 axes. + top[0]->Reshape(top_shape); } template @@ -35,10 +45,11 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, Dtype accuracy = 0; const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* bottom_label = bottom[1]->cpu_data(); - int num = bottom[0]->num(); - int dim = bottom[0]->count() / bottom[0]->num(); + int num = bottom[1]->count(); + int dim = bottom[0]->count() / num; vector maxval(top_k_+1); vector max_id(top_k_+1); + int count = 0; for (int i = 0; i < num; ++i) { // Top-k accuracy std::vector > bottom_data_vector; @@ -50,8 +61,13 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, bottom_data_vector.begin(), bottom_data_vector.begin() + top_k_, bottom_data_vector.end(), std::greater >()); // check if true label is in top k predictions + const int label_value = static_cast(bottom_label[i]); + if (has_ignore_label_ && label_value == ignore_label_) { + continue; + } + ++count; for (int k = 0; k < top_k_; k++) { - if (bottom_data_vector[k].second == static_cast(bottom_label[i])) { + if (bottom_data_vector[k].second == label_value) { ++accuracy; break; } @@ -59,7 +75,8 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, } // LOG(INFO) << "Accuracy: " << accuracy; - top[0]->mutable_cpu_data()[0] = accuracy / num; + const Dtype denominator = (denominator_ == 0) ? count : denominator_; + top[0]->mutable_cpu_data()[0] = accuracy / denominator; // Accuracy layer should not be used as a loss function. } diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index dccd5170c11..ccb3adc7e89 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -11,6 +11,8 @@ namespace caffe { template void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; // Configure the kernel size, padding, stride, and inputs. ConvolutionParameter conv_param = this->layer_param_.convolution_param(); CHECK(!conv_param.has_kernel_size() != @@ -85,10 +87,10 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, shared_ptr > weight_filler(GetFiller( this->layer_param_.convolution_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); - // If necessary, initialize and fill the biases: - // 1 x 1 x 1 x output channels + // If necessary, initialize and fill the biases. if (bias_term_) { - this->blobs_[1].reset(new Blob(1, 1, 1, num_output_)); + vector bias_shape(1, num_output_); + this->blobs_[1].reset(new Blob(bias_shape)); shared_ptr > bias_filler(GetFiller( this->layer_param_.convolution_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); @@ -101,6 +103,8 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, template void BaseConvolutionLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); @@ -144,7 +148,8 @@ void BaseConvolutionLayer::Reshape(const vector*>& bottom, } // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { - bias_multiplier_.Reshape(1, 1, 1, height_out_ * width_out_); + vector bias_multiplier_shape(1, height_out_ * width_out_); + bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index fc88433c42b..1cac8fc3387 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -9,62 +9,63 @@ namespace caffe { template void ConcatLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - concat_dim_ = this->layer_param_.concat_param().concat_dim(); - CHECK_GE(concat_dim_, 0) << - "concat_dim should be >= 0"; - CHECK_LE(concat_dim_, 1) << - "For now concat_dim <=1, it can only concat num and channels"; + const ConcatParameter& concat_param = this->layer_param_.concat_param(); + CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim())) + << "Either axis or concat_dim should be specified; not both."; } template void ConcatLayer::Reshape(const vector*>& bottom, const vector*>& top) { + const int num_axes = bottom[0]->num_axes(); + const ConcatParameter& concat_param = this->layer_param_.concat_param(); + if (concat_param.has_concat_dim()) { + concat_axis_ = static_cast(concat_param.concat_dim()); + // Don't allow negative indexing for concat_dim, a uint32 -- almost + // certainly unintended. + CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 " + << "produced negative result; concat_dim must satisfy " + << "0 <= concat_dim < " << kMaxBlobAxes; + CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range."; + } else { + concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis()); + } // Initialize with the first blob. - count_ = bottom[0]->count(); - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); + vector top_shape = bottom[0]->shape(); + num_concats_ = bottom[0]->count(0, concat_axis_); + concat_input_size_ = bottom[0]->count(concat_axis_ + 1); + int bottom_count_sum = bottom[0]->count(); for (int i = 1; i < bottom.size(); ++i) { - count_ += bottom[i]->count(); - if (concat_dim_== 0) { - num_ += bottom[i]->num(); - } else if (concat_dim_ == 1) { - channels_ += bottom[i]->channels(); - } else if (concat_dim_ == 2) { - height_ += bottom[i]->height(); - } else if (concat_dim_ == 3) { - width_ += bottom[i]->width(); + CHECK_EQ(num_axes, bottom[i]->num_axes()) + << "All inputs must have the same #axes."; + for (int j = 0; j < num_axes; ++j) { + if (j == concat_axis_) { continue; } + CHECK_EQ(top_shape[j], bottom[i]->shape(j)) + << "All inputs must have the same shape, except at concat_axis."; } + bottom_count_sum += bottom[i]->count(); + top_shape[concat_axis_] += bottom[i]->shape(concat_axis_); } - top[0]->Reshape(num_, channels_, height_, width_); - CHECK_EQ(count_, top[0]->count()); + top[0]->Reshape(top_shape); + CHECK_EQ(bottom_count_sum, top[0]->count()); } template void ConcatLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { Dtype* top_data = top[0]->mutable_cpu_data(); - if (concat_dim_== 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->cpu_data(); - int num_elem = bottom[i]->count(); - caffe_copy(num_elem, bottom_data, top_data+top[0]->offset(offset_num)); - offset_num += bottom[i]->num(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->cpu_data(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, + bottom_data + n * bottom_concat_axis * concat_input_size_, + top_data + (n * top_concat_axis + offset_concat_axis) + * concat_input_size_); } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->cpu_data(); - int num_elem = - bottom[i]->channels()*bottom[i]->height()*bottom[i]->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data+bottom[i]->offset(n), - top_data+top[0]->offset(n, offset_channel)); - } - offset_channel += bottom[i]->channels(); - } // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp. + offset_concat_axis += bottom_concat_axis; } } @@ -72,32 +73,19 @@ template void ConcatLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); - if (concat_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_cpu_diff(); - caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num), - bottom_diff); - } - offset_num += blob->num(); - } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_cpu_diff(); - int num_elem = blob->channels()*blob->height()*blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel), - bottom_diff + blob->offset(n)); - } - } - offset_channel += blob->channels(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + if (!propagate_down[i]) { continue; } + Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + + (n * top_concat_axis + offset_concat_axis) * concat_input_size_, + bottom_diff + n * bottom_concat_axis * concat_input_size_); } - } // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp. + offset_concat_axis += bottom_concat_axis; + } } #ifdef CPU_ONLY diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 88fc090025f..dbadb5aeb30 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -10,29 +10,18 @@ template void ConcatLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { Dtype* top_data = top[0]->mutable_gpu_data(); - if (concat_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->gpu_data(); - caffe_copy(bottom[i]->count(), bottom_data, - top_data + top[0]->offset(offset_num)); - offset_num += bottom[i]->num(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->gpu_data(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, + bottom_data + n * bottom_concat_axis * concat_input_size_, + top_data + (n * top_concat_axis + offset_concat_axis) + * concat_input_size_); } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->gpu_data(); - int num_elem = - bottom[i]->channels() * bottom[i]->height() * bottom[i]->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data+bottom[i]->offset(n), - top_data + top[0]->offset(n, offset_channel)); - } - offset_channel += bottom[i]->channels(); - } - } else { - LOG(FATAL) << "concat_dim along dim" << concat_dim_ << - " not implemented yet"; + offset_concat_axis += bottom_concat_axis; } } @@ -40,34 +29,18 @@ template void ConcatLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); - if (concat_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_gpu_diff(); - caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num), - bottom_diff); - } - offset_num += blob->num(); - } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_gpu_diff(); - int num_elem = blob->channels()*blob->height()*blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel), - bottom_diff + blob->offset(n)); - } - } - offset_channel += blob->channels(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + if (!propagate_down[i]) { continue; } + Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + + (n * top_concat_axis + offset_concat_axis) * concat_input_size_, + bottom_diff + n * bottom_concat_axis * concat_input_size_); } - } else { - LOG(FATAL) << "concat_dim along dim" << concat_dim_ << - " not implemented yet"; + offset_concat_axis += bottom_concat_axis; } } diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index c0c9f6f3371..928ef5ee468 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -39,13 +39,6 @@ void ConvolutionLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); - if (this->param_propagate_down_[0]) { - caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff); - } - if (this->bias_term_ && this->param_propagate_down_[1]) { - caffe_set(this->blobs_[1]->count(), Dtype(0), - this->blobs_[1]->mutable_cpu_diff()); - } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); const Dtype* bottom_data = bottom[i]->cpu_data(); diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index 3902fdf3930..b8a98ff7cc9 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -31,13 +31,6 @@ void ConvolutionLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* weight = this->blobs_[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); - if (this->param_propagate_down_[0]) { - caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); - } - if (this->bias_term_ && this->param_propagate_down_[1]) { - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), - this->blobs_[1]->mutable_gpu_diff()); - } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->gpu_diff(); // Bias gradient, if necessary. diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 071014e1b48..b5bfdb098e0 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -54,12 +54,10 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, if (this->param_propagate_down_[0]) { weight = this->blobs_[0]->gpu_data(); weight_diff = this->blobs_[0]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } Dtype* bias_diff = NULL; if (this->bias_term_ && this->param_propagate_down_[1]) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->gpu_diff(); diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp index 83a5b69a626..211701cad49 100644 --- a/src/caffe/layers/cudnn_softmax_layer.cpp +++ b/src/caffe/layers/cudnn_softmax_layer.cpp @@ -26,10 +26,10 @@ template void CuDNNSoftmaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { SoftmaxLayer::Reshape(bottom, top); - int N = bottom[0]->num(); - int K = bottom[0]->channels(); - int H = bottom[0]->height(); - int W = bottom[0]->width(); + int N = this->outer_num_; + int K = bottom[0]->shape(this->softmax_axis_); + int H = this->inner_num_; + int W = 1; cudnn::setTensor4dDesc(&bottom_desc_, N, K, H, W); cudnn::setTensor4dDesc(&top_desc_, N, K, H, W); } diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 8877caf89c8..0f2d66776a9 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -69,9 +69,9 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->width(); // label if (this->output_labels_) { - top[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1); - this->prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(), - 1, 1, 1); + vector label_shape(1, this->layer_param_.data_param().batch_size()); + top[1]->Reshape(label_shape); + this->prefetch_label_.Reshape(label_shape); } } @@ -89,9 +89,17 @@ void DataLayer::InternalThreadEntry() { // Reshape on single input batches for inputs of varying dimension. const int batch_size = this->layer_param_.data_param().batch_size(); const int crop_size = this->layer_param_.transform_param().crop_size(); + bool force_color = this->layer_param_.data_param().force_encoded_color(); if (batch_size == 1 && crop_size == 0) { Datum datum; datum.ParseFromString(cursor_->value()); + if (datum.encoded()) { + if (force_color) { + DecodeDatum(&datum, true); + } else { + DecodeDatumNative(&datum); + } + } this->prefetch_data_.Reshape(1, datum.channels(), datum.height(), datum.width()); this->transformed_data_.Reshape(1, datum.channels(), @@ -104,7 +112,6 @@ void DataLayer::InternalThreadEntry() { if (this->output_labels_) { top_label = this->prefetch_label_.mutable_cpu_data(); } - bool force_color = this->layer_param_.data_param().force_encoded_color(); for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); // get a blob diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index e6d65ab526b..a4612963b6b 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -39,13 +39,6 @@ void DeconvolutionLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); - if (this->param_propagate_down_[0]) { - caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff); - } - if (this->bias_term_ && this->param_propagate_down_[1]) { - caffe_set(this->blobs_[1]->count(), Dtype(0), - this->blobs_[1]->mutable_cpu_diff()); - } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); const Dtype* bottom_data = bottom[i]->cpu_data(); diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index 9198dd64c72..39bc4de8c66 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -31,13 +31,6 @@ void DeconvolutionLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* weight = this->blobs_[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); - if (this->param_propagate_down_[0]) { - caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); - } - if (this->bias_term_ && this->param_propagate_down_[1]) { - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), - this->blobs_[1]->mutable_gpu_diff()); - } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->gpu_diff(); const Dtype* bottom_data = bottom[i]->gpu_data(); diff --git a/src/caffe/layers/dummy_data_layer.cpp b/src/caffe/layers/dummy_data_layer.cpp index d254eb1f961..6b0d617464c 100644 --- a/src/caffe/layers/dummy_data_layer.cpp +++ b/src/caffe/layers/dummy_data_layer.cpp @@ -16,18 +16,30 @@ void DummyDataLayer::LayerSetUp(const vector*>& bottom, num_data_filler == num_top) << "Number of data fillers must be 0, 1 or equal to the number of tops: " << num_top << "; you specified " << num_data_filler << " data fillers."; - CHECK(param.num_size() == 1 || param.num_size() == num_top) - << "Must specify either a single (1) 'num' or one for each top blob " - << "(" << num_top << "); you specified " << param.num_size() << "."; - CHECK(param.channels_size() == 1 || param.channels_size() == num_top) - << "Must specify either a single (1) 'channels' or one for each top blob " - << "(" << num_top << "); you specified " << param.channels_size() << "."; - CHECK(param.height_size() == 1 || param.height_size() == num_top) - << "Must specify either a single (1) 'height' or one for each top blob " - << "(" << num_top << "); you specified " << param.height_size() << "."; - CHECK(param.width_size() == 1 || param.width_size() == num_top) - << "Must specify either a single (1) 'width' or one for each top blob " - << "(" << num_top << "); you specified " << param.width_size() << "."; + + const bool legacy_dims = param.num_size() || param.channels_size() || + param.height_size() || param.width_size(); + if (legacy_dims) { + CHECK_EQ(0, param.shape_size()) + << "Both shape and legacy fields were specified"; + // Using deprecated 4D output dim specifiers. + CHECK(param.num_size() == 1 || param.num_size() == num_top) + << "Must specify 'num' once, or once per top blob " + << "(" << num_top << "); specified " << param.num_size() << "."; + CHECK(param.channels_size() == 1 || param.channels_size() == num_top) + << "Must specify 'channels' once, or once per top blob " + << "(" << num_top << "); specified " << param.channels_size() << "."; + CHECK(param.height_size() == 1 || param.height_size() == num_top) + << "Must specify 'height' once, or once per top blob " + << "(" << num_top << "); specified " << param.height_size() << "."; + CHECK(param.width_size() == 1 || param.width_size() == num_top) + << "Must specify 'width' once, or once per top blob " + << "(" << num_top << "); specified " << param.width_size() << "."; + } else { + CHECK(param.shape_size() == 1 || param.shape_size() == num_top) + << "Must specify 'shape' once, or once per top blob " + << "(" << num_top << "); specified " << param.shape_size() << "."; + } // refill_[i] tells Forward i whether or not to actually refill top Blob i. // If refill_[i] is false, Forward does nothing for Blob i. We use this to // avoid wastefully refilling "constant" Blobs in every forward pass. @@ -63,14 +75,19 @@ void DummyDataLayer::LayerSetUp(const vector*>& bottom, } } for (int i = 0; i < num_top; ++i) { - const int num = (param.num_size() == 1) ? param.num(0) : param.num(i); - const int channels = - (param.channels_size() == 1) ? param.channels(0) : param.channels(i); - const int height = - (param.height_size() == 1) ? param.height(0) : param.height(i); - const int width = - (param.width_size() == 1) ? param.width(0) : param.width(i); - top[i]->Reshape(num, channels, height, width); + if (legacy_dims) { + const int num = (param.num_size() == 1) ? param.num(0) : param.num(i); + const int channels = + (param.channels_size() == 1) ? param.channels(0) : param.channels(i); + const int height = + (param.height_size() == 1) ? param.height(0) : param.height(i); + const int width = + (param.width_size() == 1) ? param.width(0) : param.width(i); + top[i]->Reshape(num, channels, height, width); + } else { + const int shape_index = (param.shape_size() == 1) ? 0 : i; + top[i]->Reshape(param.shape(shape_index)); + } } // Run Forward once, with refill_ inverted, to fill the constant Blobs. this->Forward(bottom, top); diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp index bbc34449588..55d2d955c7c 100644 --- a/src/caffe/layers/eltwise_layer.cpp +++ b/src/caffe/layers/eltwise_layer.cpp @@ -10,18 +10,23 @@ namespace caffe { template void EltwiseLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - CHECK(this->layer_param().eltwise_param().coeff_size() == 0 - || this->layer_param().eltwise_param().coeff_size() == bottom.size()) << + op_ = this->layer_param_.eltwise_param().operation(); + coeff_blob_ = this->layer_param().eltwise_param().coeff_blob(); + if (coeff_blob_) { + CHECK_EQ(op_, EltwiseParameter_EltwiseOp_SUM) + << "coeff_blob option only implemented for the SUM operation"; + } + const int coeff_size = this->layer_param().eltwise_param().coeff_size(); + CHECK(coeff_size == 0 || (!coeff_blob_ && coeff_size == bottom.size()) + || (coeff_blob_ && coeff_size == bottom.size() - 1)) << "Eltwise Layer takes one coefficient per bottom blob."; - CHECK(!(this->layer_param().eltwise_param().operation() - == EltwiseParameter_EltwiseOp_PROD - && this->layer_param().eltwise_param().coeff_size())) << + CHECK(op_ == EltwiseParameter_EltwiseOp_SUM + || this->layer_param().eltwise_param().coeff_size() == 0) << "Eltwise layer only takes coefficients for summation."; - op_ = this->layer_param_.eltwise_param().operation(); // Blob-wise coefficients for the elementwise operation. - coeffs_ = vector(bottom.size(), 1); - if (this->layer_param().eltwise_param().coeff_size()) { - for (int i = 0; i < bottom.size(); ++i) { + coeffs_.resize(bottom.size(), 1); + if (coeff_size) { + for (int i = 0; i < bottom.size() - coeff_blob_; ++i) { coeffs_[i] = this->layer_param().eltwise_param().coeff(i); } } @@ -31,21 +36,26 @@ void EltwiseLayer::LayerSetUp(const vector*>& bottom, template void EltwiseLayer::Reshape(const vector*>& bottom, const vector*>& top) { - const int num = bottom[0]->num(); - const int channels = bottom[0]->channels(); - const int height = bottom[0]->height(); - const int width = bottom[0]->width(); for (int i = 1; i < bottom.size(); ++i) { - CHECK_EQ(num, bottom[i]->num()); - CHECK_EQ(channels, bottom[i]->channels()); - CHECK_EQ(height, bottom[i]->height()); - CHECK_EQ(width, bottom[i]->width()); + if (coeff_blob_ && i == bottom.size() - 1) { + CHECK_EQ(i, bottom[i]->shape(0)) + << "Dimension of coeff blob axis 0 must equal the number of bottom " + << "blobs (not including the coeff blob itself)."; + for (int input_axis = 0, coeff_axis = 1; + coeff_axis < bottom[i]->num_axes(); ++input_axis, ++coeff_axis) { + CHECK_EQ(bottom[0]->shape(input_axis), bottom[i]->shape(coeff_axis)) + << "Each axis i >= 1 of the coeff blob must match the (i-1)th " + << "axis of the input."; + } + } else { + CHECK(bottom[i]->shape() == bottom[0]->shape()); + } } - top[0]->Reshape(num, channels, height, width); + top[0]->ReshapeLike(*bottom[0]); // If max operation, we will initialize the vector index part. if (this->layer_param_.eltwise_param().operation() == EltwiseParameter_EltwiseOp_MAX && top.size() == 1) { - max_idx_.Reshape(bottom[0]->num(), channels, height, width); + max_idx_.Reshape(bottom[0]->shape()); } } @@ -67,8 +77,21 @@ void EltwiseLayer::Forward_cpu( case EltwiseParameter_EltwiseOp_SUM: caffe_set(count, Dtype(0), top_data); // TODO(shelhamer) does BLAS optimize to sum for coeff = 1? - for (int i = 0; i < bottom.size(); ++i) { - caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); + for (int i = 0; i < bottom.size() - coeff_blob_; ++i) { + if (coeff_blob_) { + const int num = bottom[bottom.size() - 1]->count() / + (bottom.size() - 1); + const int dim = bottom[i]->count() / num; + const Dtype* bottom_data = bottom[i]->cpu_data(); + const Dtype* coeff_data = bottom[bottom.size() - 1]->cpu_data(); + for (int j = 0; j < num; ++j, bottom_data += dim, top_data += dim) { + const Dtype coeff = coeffs_[i] * coeff_data[i * num + j]; + caffe_axpy(dim, coeff, bottom_data, top_data); + } + top_data = top[0]->mutable_cpu_data(); + } else { + caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); + } } break; case EltwiseParameter_EltwiseOp_MAX: @@ -111,7 +134,7 @@ void EltwiseLayer::Backward_cpu(const vector*>& top, const int count = top[0]->count(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); - for (int i = 0; i < bottom.size(); ++i) { + for (int i = 0; i < bottom.size() - coeff_blob_; ++i) { if (propagate_down[i]) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); @@ -135,7 +158,16 @@ void EltwiseLayer::Backward_cpu(const vector*>& top, caffe_mul(count, bottom_diff, top_diff, bottom_diff); break; case EltwiseParameter_EltwiseOp_SUM: - if (coeffs_[i] == Dtype(1)) { + if (coeff_blob_) { + const int num = bottom[bottom.size() - 1]->count() / + (bottom.size() - 1); + const int dim = bottom[i]->count() / num; + const Dtype* coeff_data = bottom[bottom.size() - 1]->cpu_data(); + for (int j = 0; j < num; ++j, bottom_diff += dim, top_diff += dim) { + const Dtype coeff = coeffs_[i] * coeff_data[i * num + j]; + caffe_cpu_scale(dim, coeff, top_diff, bottom_diff); + } + } else if (coeffs_[i] == Dtype(1.)) { caffe_copy(count, top_diff, bottom_diff); } else { caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff); diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu index 2247870d97f..97f52079108 100644 --- a/src/caffe/layers/eltwise_layer.cu +++ b/src/caffe/layers/eltwise_layer.cu @@ -31,12 +31,33 @@ __global__ void MaxForward(const int nthreads, const Dtype* bottom_data_a, } } +template +__global__ void CoeffSum(const int count, const int dim, + const int num_offset, const Dtype coeff, const Dtype* coeff_data, + const bool backward, const Dtype* in, Dtype* out) { + CUDA_KERNEL_LOOP(index, count) { + const int n = num_offset + index / dim; + const Dtype other_coeff = coeff_data ? coeff_data[n] : Dtype(1); + const Dtype final_coeff = coeff * other_coeff; + const Dtype result = in[index] * final_coeff; + if (num_offset == 0 || backward) { + out[index] = result; + } else { + out[index] += result; + } + } +} + template void EltwiseLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { int* mask = NULL; const int count = top[0]->count(); + const int num = top[0]->num(); + const int dim = count / num; Dtype* top_data = top[0]->mutable_gpu_data(); + const Dtype* coeff_data = NULL; + const bool kBackward = false; switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_gpu_mul(count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), @@ -46,10 +67,17 @@ void EltwiseLayer::Forward_gpu(const vector*>& bottom, } break; case EltwiseParameter_EltwiseOp_SUM: - caffe_gpu_set(count, Dtype(0.), top_data); // TODO(shelhamer) does cuBLAS optimize to sum for coeff = 1? - for (int i = 0; i < bottom.size(); ++i) { - caffe_gpu_axpy(count, coeffs_[i], bottom[i]->gpu_data(), top_data); + if (coeff_blob_) { + coeff_data = bottom[bottom.size() - 1]->gpu_data(); + } + for (int i = 0; i < bottom.size() - coeff_blob_; ++i) { + const Dtype* bottom_data = bottom[i]->gpu_data(); + CoeffSum // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, dim, i * num, coeffs_[i], coeff_data, + kBackward, bottom_data, top_data); + CUDA_POST_KERNEL_CHECK; } break; case EltwiseParameter_EltwiseOp_MAX: @@ -85,11 +113,18 @@ void EltwiseLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const int* mask = NULL; const int count = top[0]->count(); + const int num = top[0]->num(); + const int dim = count / num; const Dtype* top_data = top[0]->gpu_data(); - const Dtype* top_diff = top[0]->gpu_diff(); - for (int i = 0; i < bottom.size(); ++i) { + const Dtype* coeff_data = NULL; + if (coeff_blob_) { + coeff_data = bottom[bottom.size() - 1]->gpu_data(); + } + const bool kBackward = true; + for (int i = 0; i < bottom.size() - coeff_blob_; ++i) { if (propagate_down[i]) { const Dtype* bottom_data = bottom[i]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: @@ -111,11 +146,11 @@ void EltwiseLayer::Backward_gpu(const vector*>& top, caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff); break; case EltwiseParameter_EltwiseOp_SUM: - if (coeffs_[i] == Dtype(1.)) { - caffe_copy(count, top_diff, bottom_diff); - } else { - caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff); - } + CoeffSum // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, dim, i * num, coeffs_[i], coeff_data, + kBackward, top_diff, bottom_diff); + CUDA_POST_KERNEL_CHECK; break; case EltwiseParameter_EltwiseOp_MAX: mask = max_idx_.gpu_data(); diff --git a/src/caffe/layers/embed_layer.cpp b/src/caffe/layers/embed_layer.cpp new file mode 100644 index 00000000000..be6b2cd2727 --- /dev/null +++ b/src/caffe/layers/embed_layer.cpp @@ -0,0 +1,122 @@ +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void EmbedLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + N_ = this->layer_param_.embed_param().num_output(); + CHECK_GT(N_, 0) << "EmbedLayer num_output must be positive."; + K_ = this->layer_param_.embed_param().input_dim(); + CHECK_GT(K_, 0) << "EmbedLayer input_dim must be positive."; + bias_term_ = this->layer_param_.embed_param().bias_term(); + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + if (bias_term_) { + this->blobs_.resize(2); + } else { + this->blobs_.resize(1); + } + // Initialize the weights -- + // transposed from InnerProductLayer for spatial locality. + vector weight_shape(2); + weight_shape[0] = K_; + weight_shape[1] = N_; + this->blobs_[0].reset(new Blob(weight_shape)); + // fill the weights + shared_ptr > weight_filler(GetFiller( + this->layer_param_.embed_param().weight_filler())); + weight_filler->Fill(this->blobs_[0].get()); + // If necessary, initialize and fill the bias term + if (bias_term_) { + vector bias_shape(1, N_); + this->blobs_[1].reset(new Blob(bias_shape)); + shared_ptr > bias_filler(GetFiller( + this->layer_param_.embed_param().bias_filler())); + bias_filler->Fill(this->blobs_[1].get()); + } + } // parameter initialization + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void EmbedLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + // Figure out the dimensions + M_ = bottom[0]->count(); + vector top_shape = bottom[0]->shape(); + top_shape.push_back(N_); + top[0]->Reshape(top_shape); + // Set up the bias multiplier + if (bias_term_) { + vector bias_shape(1, M_); + bias_multiplier_.Reshape(bias_shape); + caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); + } +} + +template +void EmbedLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + int index; + for (int n = 0; n < M_; ++n) { + index = static_cast(bottom_data[n]); + DCHECK_GE(index, 0); + DCHECK_LT(index, K_); + DCHECK_EQ(static_cast(index), bottom_data[n]) << "non-integer input"; + caffe_copy(N_, weight + index * N_, top_data + n * N_); + } + if (bias_term_) { + const Dtype* bias = this->blobs_[1]->cpu_data(); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1), + bias_multiplier_.cpu_data(), bias, Dtype(1), top_data); + } +} + +template +void EmbedLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; + if (this->param_propagate_down_[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + // Gradient with respect to weight + Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); + int index; + for (int n = 0; n < M_; ++n) { + index = static_cast(bottom_data[n]); + DCHECK_GE(index, 0); + DCHECK_LT(index, K_); + DCHECK_EQ(static_cast(index), bottom_data[n]) + << "non-integer input"; + caffe_axpy(N_, Dtype(1), top_diff + n * N_, weight_diff + index * N_); + } + } + if (bias_term_ && this->param_propagate_down_[1]) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); + caffe_cpu_gemv(CblasTrans, M_, N_, Dtype(1), top_diff, + bias_multiplier_.cpu_data(), Dtype(1), bias_diff); + } +} + +#ifdef CPU_ONLY +STUB_GPU(EmbedLayer); +#endif + +INSTANTIATE_CLASS(EmbedLayer); +REGISTER_LAYER_CLASS(Embed); + +} // namespace caffe diff --git a/src/caffe/layers/embed_layer.cu b/src/caffe/layers/embed_layer.cu new file mode 100644 index 00000000000..672fb9c608c --- /dev/null +++ b/src/caffe/layers/embed_layer.cu @@ -0,0 +1,85 @@ +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/gpu_util.cuh" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void EmbedForward(const int nthreads, const Dtype* bottom_data, + const Dtype* weight, const int M, const int N, const int K, + Dtype* top_data) { + CUDA_KERNEL_LOOP(top_index, nthreads) { + const int n = top_index / N; + const int d = top_index % N; + const int index = static_cast(bottom_data[n]); + const int weight_index = index * N + d; + top_data[top_index] = weight[weight_index]; + } +} + +template +__global__ void EmbedBackward(const int nthreads, const Dtype* bottom_data, + const Dtype* top_diff, const int M, const int N, const int K, + Dtype* weight_diff); + +template +__global__ void EmbedBackward(const int nthreads, const Dtype* bottom_data, + const Dtype* top_diff, const int M, const int N, const int K, + Dtype* weight_diff) { + CUDA_KERNEL_LOOP(top_index, nthreads) { + const int n = top_index / N; + const int d = top_index % N; + const int index = static_cast(bottom_data[n]); + const int weight_index = index * N + d; + caffe_gpu_atomic_add(top_diff[top_index], weight_diff + weight_index); + } +} + +template +void EmbedLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + const int count = top[0]->count(); + EmbedForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, weight, M_, N_, K_, top_data); + if (bias_term_) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1), + bias_multiplier_.gpu_data(), + this->blobs_[1]->gpu_data(), Dtype(1), top_data); + } +} + +template +void EmbedLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; + if (this->param_propagate_down_[0]) { + const int top_count = top[0]->count(); + const int count = this->blobs_[0]->count(); + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); + EmbedBackward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + top_count, bottom_data, top_diff, M_, N_, K_, weight_diff); + } + if (bias_term_ && this->param_propagate_down_[1]) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); + caffe_gpu_gemv(CblasTrans, M_, N_, Dtype(1), top_diff, + bias_multiplier_.gpu_data(), Dtype(1), bias_diff); + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(EmbedLayer); + +} // namespace caffe diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp index b539d3487f5..80efa31b22c 100644 --- a/src/caffe/layers/euclidean_loss_layer.cpp +++ b/src/caffe/layers/euclidean_loss_layer.cpp @@ -11,11 +11,9 @@ template void EuclideanLossLayer::Reshape( const vector*>& bottom, const vector*>& top) { LossLayer::Reshape(bottom, top); - CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); - CHECK_EQ(bottom[0]->height(), bottom[1]->height()); - CHECK_EQ(bottom[0]->width(), bottom[1]->width()); - diff_.Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); + CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1)) + << "Inputs must have the same dimension."; + diff_.ReshapeLike(*bottom[0]); } template diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index eb7b42bc10b..3a078fabb81 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -9,30 +9,15 @@ namespace caffe { template void FlattenLayer::Reshape(const vector*>& bottom, const vector*>& top) { - int channels_out = bottom[0]->channels() * bottom[0]->height() - * bottom[0]->width(); - top[0]->Reshape(bottom[0]->num(), channels_out, 1, 1); - count_ = bottom[0]->num() * channels_out; - CHECK_EQ(count_, bottom[0]->count()); - CHECK_EQ(count_, top[0]->count()); -} - -template -void FlattenLayer::Forward_cpu(const vector*>& bottom, - const vector*>& top) { + vector top_shape(2); + top_shape[0] = bottom[0]->num(); + top_shape[1] = bottom[0]->count() / bottom[0]->num(); + top[0]->Reshape(top_shape); + CHECK_EQ(top[0]->count(), bottom[0]->count()); top[0]->ShareData(*bottom[0]); + top[0]->ShareDiff(*bottom[0]); } -template -void FlattenLayer::Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - bottom[0]->ShareDiff(*top[0]); -} - -#ifdef CPU_ONLY -STUB_GPU(FlattenLayer); -#endif - INSTANTIATE_CLASS(FlattenLayer); REGISTER_LAYER_CLASS(Flatten); diff --git a/src/caffe/layers/flatten_layer.cu b/src/caffe/layers/flatten_layer.cu deleted file mode 100644 index 42abdad4499..00000000000 --- a/src/caffe/layers/flatten_layer.cu +++ /dev/null @@ -1,23 +0,0 @@ -#include - -#include "caffe/layer.hpp" -#include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" - -namespace caffe { - -template -void FlattenLayer::Forward_gpu(const vector*>& bottom, - const vector*>& top) { - top[0]->ShareData(*bottom[0]); -} - -template -void FlattenLayer::Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - bottom[0]->ShareDiff(*top[0]); -} - -INSTANTIATE_LAYER_GPU_FUNCS(FlattenLayer); - -} // namespace caffe diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 3d856ec3001..1ceb6c24431 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -36,7 +36,7 @@ void HDF5DataLayer::LoadHDF5FileData(const char* filename) { hdf_blobs_.resize(top_size); const int MIN_DATA_DIM = 1; - const int MAX_DATA_DIM = 4; + const int MAX_DATA_DIM = INT_MAX; for (int i = 0; i < top_size; ++i) { hdf_blobs_[i] = shared_ptr >(new Blob()); @@ -88,9 +88,14 @@ void HDF5DataLayer::LayerSetUp(const vector*>& bottom, // Reshape blobs. const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); const int top_size = this->layer_param_.top_size(); + vector top_shape; for (int i = 0; i < top_size; ++i) { - top[i]->Reshape(batch_size, hdf_blobs_[i]->channels(), - hdf_blobs_[i]->height(), hdf_blobs_[i]->width()); + top_shape.resize(hdf_blobs_[i]->num_axes()); + top_shape[0] = batch_size; + for (int j = 1; j < top_shape.size(); ++j) { + top_shape[j] = hdf_blobs_[i]->shape(j); + } + top[i]->Reshape(top_shape); } } diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index 112226116c8..1c802714e33 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -50,6 +50,8 @@ void Im2colLayer::LayerSetUp(const vector*>& bottom, template void Im2colLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index f9046e1b3a1..38ebbd5ec14 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -81,8 +81,9 @@ void ImageDataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label - top[1]->Reshape(batch_size, 1, 1, 1); - this->prefetch_label_.Reshape(batch_size, 1, 1, 1); + vector label_shape(1, batch_size); + top[1]->Reshape(label_shape); + this->prefetch_label_.Reshape(label_shape); } template diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp index b1ec6cb25c0..83c3235eb71 100644 --- a/src/caffe/layers/inner_product_layer.cpp +++ b/src/caffe/layers/inner_product_layer.cpp @@ -15,7 +15,12 @@ void InnerProductLayer::LayerSetUp(const vector*>& bottom, const int num_output = this->layer_param_.inner_product_param().num_output(); bias_term_ = this->layer_param_.inner_product_param().bias_term(); N_ = num_output; - K_ = bottom[0]->count() / bottom[0]->num(); + const int axis = bottom[0]->CanonicalAxisIndex( + this->layer_param_.inner_product_param().axis()); + // Dimensions starting from "axis" are "flattened" into a single + // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), + // and axis == 1, N inner products with dimension CHW are performed. + K_ = bottom[0]->count(axis); // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; @@ -26,14 +31,18 @@ void InnerProductLayer::LayerSetUp(const vector*>& bottom, this->blobs_.resize(1); } // Intialize the weight - this->blobs_[0].reset(new Blob(1, 1, N_, K_)); + vector weight_shape(2); + weight_shape[0] = N_; + weight_shape[1] = K_; + this->blobs_[0].reset(new Blob(weight_shape)); // fill the weights shared_ptr > weight_filler(GetFiller( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, intiialize and fill the bias term if (bias_term_) { - this->blobs_[1].reset(new Blob(1, 1, 1, N_)); + vector bias_shape(1, N_); + this->blobs_[1].reset(new Blob(bias_shape)); shared_ptr > bias_filler(GetFiller( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); @@ -46,13 +55,24 @@ template void InnerProductLayer::Reshape(const vector*>& bottom, const vector*>& top) { // Figure out the dimensions - M_ = bottom[0]->num(); - CHECK_EQ(bottom[0]->count() / bottom[0]->num(), K_) << "Input size " - "incompatible with inner product parameters."; - top[0]->Reshape(bottom[0]->num(), N_, 1, 1); + const int axis = bottom[0]->CanonicalAxisIndex( + this->layer_param_.inner_product_param().axis()); + const int new_K = bottom[0]->count(axis); + CHECK_EQ(K_, new_K) + << "Input size incompatible with inner product parameters."; + // The first "axis" dimensions are independent inner products; the total + // number of these is M_, the product over these dimensions. + M_ = bottom[0]->count(0, axis); + // The top shape will be the bottom shape with the flattened axes dropped, + // and replaced by a single axis with dimension num_output (N_). + vector top_shape = bottom[0]->shape(); + top_shape.resize(axis + 1); + top_shape[axis] = N_; + top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { - bias_multiplier_.Reshape(1, 1, 1, M_); + vector bias_shape(1, M_); + bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); } } @@ -81,13 +101,13 @@ void InnerProductLayer::Backward_cpu(const vector*>& top, const Dtype* bottom_data = bottom[0]->cpu_data(); // Gradient with respect to weight caffe_cpu_gemm(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., - top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff()); + top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv(CblasTrans, M_, N_, (Dtype)1., top_diff, - bias_multiplier_.cpu_data(), (Dtype)0., + bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu index a9e1784a205..dd90cac12a8 100644 --- a/src/caffe/layers/inner_product_layer.cu +++ b/src/caffe/layers/inner_product_layer.cu @@ -33,13 +33,13 @@ void InnerProductLayer::Backward_gpu(const vector*>& top, const Dtype* bottom_data = bottom[0]->gpu_data(); // Gradient with respect to weight caffe_gpu_gemm(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., - top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff()); + top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_gpu_diff()); } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->gpu_diff(); // Gradient with respect to bias caffe_gpu_gemv(CblasTrans, M_, N_, (Dtype)1., top_diff, - bias_multiplier_.gpu_data(), (Dtype)0., + bias_multiplier_.gpu_data(), (Dtype)1., this->blobs_[1]->mutable_gpu_diff()); } if (propagate_down[0]) { diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp index a5b6d11b065..3496a5c2a8a 100644 --- a/src/caffe/layers/loss_layer.cpp +++ b/src/caffe/layers/loss_layer.cpp @@ -24,7 +24,8 @@ void LossLayer::Reshape( const vector*>& bottom, const vector*>& top) { CHECK_EQ(bottom[0]->num(), bottom[1]->num()) << "The data and label should have the same number."; - top[0]->Reshape(1, 1, 1, 1); + vector loss_shape(0); // Loss layers output a scalar; 0 axes. + top[0]->Reshape(loss_shape); } INSTANTIATE_CLASS(LossLayer); diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index 5e3e7c429ef..36c1ace4c99 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -69,6 +69,8 @@ void LRNLayer::LayerSetUp(const vector*>& bottom, template void LRNLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); diff --git a/src/caffe/layers/lstm_layer.cpp b/src/caffe/layers/lstm_layer.cpp new file mode 100644 index 00000000000..91543f73f71 --- /dev/null +++ b/src/caffe/layers/lstm_layer.cpp @@ -0,0 +1,221 @@ +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/sequence_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void LSTMLayer::RecurrentInputBlobNames(vector* names) const { + names->resize(2); + (*names)[0] = "h_0"; + (*names)[1] = "c_0"; +} + +template +void LSTMLayer::RecurrentOutputBlobNames(vector* names) const { + names->resize(2); + (*names)[0] = "h_" + this->int_to_str(this->T_); + (*names)[1] = "c_T"; +} + +template +void LSTMLayer::OutputBlobNames(vector* names) const { + names->resize(1); + (*names)[0] = "h"; +} + +template +void LSTMLayer::FillUnrolledNet(NetParameter* net_param) const { + const int num_output = this->layer_param_.recurrent_param().num_output(); + CHECK_GT(num_output, 0) << "num_output must be positive"; + const FillerParameter& weight_filler = + this->layer_param_.recurrent_param().weight_filler(); + const FillerParameter& bias_filler = + this->layer_param_.recurrent_param().bias_filler(); + + // Add generic LayerParameter's (without bottoms/tops) of layer types we'll + // use to save redundant code. + LayerParameter hidden_param; + hidden_param.set_type("InnerProduct"); + hidden_param.mutable_inner_product_param()->set_num_output(num_output * 4); + hidden_param.mutable_inner_product_param()->set_bias_term(false); + hidden_param.mutable_inner_product_param()->set_axis(2); + hidden_param.mutable_inner_product_param()-> + mutable_weight_filler()->CopyFrom(weight_filler); + + LayerParameter biased_hidden_param(hidden_param); + biased_hidden_param.mutable_inner_product_param()->set_bias_term(true); + biased_hidden_param.mutable_inner_product_param()-> + mutable_bias_filler()->CopyFrom(bias_filler); + + LayerParameter sum_param; + sum_param.set_type("Eltwise"); + sum_param.mutable_eltwise_param()->set_operation( + EltwiseParameter_EltwiseOp_SUM); + + LayerParameter slice_param; + slice_param.set_type("Slice"); + slice_param.mutable_slice_param()->set_axis(0); + + LayerParameter split_param; + split_param.set_type("Split"); + + BlobShape input_shape; + input_shape.add_dim(1); // c_0 and h_0 are a single timestep + input_shape.add_dim(this->N_); + input_shape.add_dim(num_output); + + net_param->add_input("c_0"); + net_param->add_input_shape()->CopyFrom(input_shape); + + net_param->add_input("h_0"); + net_param->add_input_shape()->CopyFrom(input_shape); + + LayerParameter* cont_slice_param = net_param->add_layer(); + cont_slice_param->CopyFrom(slice_param); + cont_slice_param->set_name("cont_slice"); + cont_slice_param->add_bottom("cont"); + cont_slice_param->mutable_slice_param()->set_axis(1); + + // Add layer to transform all timesteps of x to the hidden state dimension. + // W_xc_x = W_xc * x + b_c + { + LayerParameter* x_transform_param = net_param->add_layer(); + x_transform_param->CopyFrom(biased_hidden_param); + x_transform_param->set_name("x_transform"); + x_transform_param->add_param()->set_name("W_xc"); + x_transform_param->add_param()->set_name("b_c"); + x_transform_param->add_bottom("x"); + x_transform_param->add_top("W_xc_x"); + } + + if (this->static_input_) { + // Add layer to transform x_static to the gate dimension. + // W_xc_x_static = W_xc_static * x_static + LayerParameter* x_static_transform_param = net_param->add_layer(); + x_static_transform_param->CopyFrom(hidden_param); + x_static_transform_param->mutable_inner_product_param()->set_axis(1); + x_static_transform_param->set_name("W_xc_x_static"); + x_static_transform_param->add_param()->set_name("W_xc_static"); + x_static_transform_param->add_bottom("x_static"); + x_static_transform_param->add_top("W_xc_x_static"); + + LayerParameter* reshape_param = net_param->add_layer(); + reshape_param->set_type("Reshape"); + BlobShape* new_shape = + reshape_param->mutable_reshape_param()->mutable_shape(); + new_shape->add_dim(1); // One timestep. + new_shape->add_dim(this->N_); + new_shape->add_dim( + x_static_transform_param->inner_product_param().num_output()); + reshape_param->add_bottom("W_xc_x_static"); + reshape_param->add_top("W_xc_x_static"); + } + + LayerParameter* x_slice_param = net_param->add_layer(); + x_slice_param->CopyFrom(slice_param); + x_slice_param->add_bottom("W_xc_x"); + x_slice_param->set_name("W_xc_x_slice"); + + LayerParameter output_concat_layer; + output_concat_layer.set_name("h_concat"); + output_concat_layer.set_type("Concat"); + output_concat_layer.add_top("h"); + output_concat_layer.mutable_concat_param()->set_axis(0); + + for (int t = 1; t <= this->T_; ++t) { + string tm1s = this->int_to_str(t - 1); + string ts = this->int_to_str(t); + + cont_slice_param->add_top("cont_" + ts); + x_slice_param->add_top("W_xc_x_" + ts); + + // Add layers to flush the hidden state when beginning a new + // sequence, as indicated by cont_t. + // h_conted_{t-1} := cont_t * h_{t-1} + // + // Normally, cont_t is binary (i.e., 0 or 1), so: + // h_conted_{t-1} := h_{t-1} if cont_t == 1 + // 0 otherwise + { + LayerParameter* cont_h_param = net_param->add_layer(); + cont_h_param->CopyFrom(sum_param); + cont_h_param->mutable_eltwise_param()->set_coeff_blob(true); + cont_h_param->set_name("h_conted_" + tm1s); + cont_h_param->add_bottom("h_" + tm1s); + cont_h_param->add_bottom("cont_" + ts); + cont_h_param->add_top("h_conted_" + tm1s); + } + + // Add layer to compute + // W_hc_h_{t-1} := W_hc * h_conted_{t-1} + { + LayerParameter* w_param = net_param->add_layer(); + w_param->CopyFrom(hidden_param); + w_param->set_name("transform_" + ts); + w_param->add_param()->set_name("W_hc"); + w_param->add_bottom("h_conted_" + tm1s); + w_param->add_top("W_hc_h_" + tm1s); + w_param->mutable_inner_product_param()->set_axis(2); + } + + // Add the outputs of the linear transformations to compute the gate input. + // gate_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c + // = W_hc_h_{t-1} + W_xc_x_t + b_c + { + LayerParameter* input_sum_layer = net_param->add_layer(); + input_sum_layer->CopyFrom(sum_param); + input_sum_layer->set_name("gate_input_" + ts); + input_sum_layer->add_bottom("W_hc_h_" + tm1s); + input_sum_layer->add_bottom("W_xc_x_" + ts); + if (this->static_input_) { + input_sum_layer->add_bottom("W_xc_x_static"); + } + input_sum_layer->add_top("gate_input_" + ts); + } + + // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. + // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t + // Outputs: c_t, h_t + // [ i_t' ] + // [ f_t' ] := gate_input_t + // [ o_t' ] + // [ g_t' ] + // i_t := \sigmoid[i_t'] + // f_t := \sigmoid[f_t'] + // o_t := \sigmoid[o_t'] + // g_t := \tanh[g_t'] + // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) + // h_t := o_t .* \tanh[c_t] + { + LayerParameter* lstm_unit_param = net_param->add_layer(); + lstm_unit_param->set_type("LSTMUnit"); + lstm_unit_param->add_bottom("c_" + tm1s); + lstm_unit_param->add_bottom("gate_input_" + ts); + lstm_unit_param->add_bottom("cont_" + ts); + lstm_unit_param->add_top("c_" + ts); + lstm_unit_param->add_top("h_" + ts); + lstm_unit_param->set_name("unit_" + ts); + } + output_concat_layer.add_bottom("h_" + ts); + } // for (int t = 1; t <= this->T_; ++t) + + { + LayerParameter* c_T_copy_param = net_param->add_layer(); + c_T_copy_param->CopyFrom(split_param); + c_T_copy_param->add_bottom("c_" + this->int_to_str(this->T_)); + c_T_copy_param->add_top("c_T"); + } + net_param->add_layer()->CopyFrom(output_concat_layer); +} + +INSTANTIATE_CLASS(LSTMLayer); +REGISTER_LAYER_CLASS(LSTM); + +} // namespace caffe diff --git a/src/caffe/layers/lstm_unit_layer.cpp b/src/caffe/layers/lstm_unit_layer.cpp new file mode 100644 index 00000000000..74078d264f5 --- /dev/null +++ b/src/caffe/layers/lstm_unit_layer.cpp @@ -0,0 +1,128 @@ +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/sequence_layers.hpp" + +namespace caffe { + +template +inline Dtype sigmoid(Dtype x) { + return 1. / (1. + exp(-x)); +} + +template +inline Dtype tanh(Dtype x) { + return 2. * sigmoid(2. * x) - 1.; +} + +template +void LSTMUnitLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + for (int i = 0; i < bottom.size(); ++i) { + CHECK_EQ(3, bottom[i]->num_axes()); + CHECK_EQ(1, bottom[i]->shape(0)); + } + const int num_instances = bottom[0]->shape(1); + hidden_dim_ = bottom[0]->shape(2); + CHECK_EQ(num_instances, bottom[1]->shape(1)); + CHECK_EQ(4 * hidden_dim_, bottom[1]->shape(2)); + CHECK_EQ(1, bottom[2]->shape(1)); + CHECK_EQ(num_instances, bottom[2]->shape(2)); + top[0]->ReshapeLike(*bottom[0]); + top[1]->ReshapeLike(*bottom[0]); + X_acts_.ReshapeLike(*bottom[1]); +} + +template +void LSTMUnitLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const int num = bottom[0]->shape(1); + const int x_dim = hidden_dim_ * 4; + const Dtype* C_prev = bottom[0]->cpu_data(); + const Dtype* X = bottom[1]->cpu_data(); + const Dtype* flush = bottom[2]->cpu_data(); + Dtype* C = top[0]->mutable_cpu_data(); + Dtype* H = top[1]->mutable_cpu_data(); + for (int n = 0; n < num; ++n) { + for (int d = 0; d < hidden_dim_; ++d) { + const Dtype i = sigmoid(X[d]); + const Dtype f = (*flush == 0) ? 0 : + (*flush * sigmoid(X[1 * hidden_dim_ + d])); + const Dtype o = sigmoid(X[2 * hidden_dim_ + d]); + const Dtype g = tanh(X[3 * hidden_dim_ + d]); + const Dtype c_prev = C_prev[d]; + const Dtype c = f * c_prev + i * g; + C[d] = c; + const Dtype tanh_c = tanh(c); + H[d] = o * tanh_c; + } + C_prev += hidden_dim_; + X += x_dim; + C += hidden_dim_; + H += hidden_dim_; + ++flush; + } +} + +template +void LSTMUnitLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + CHECK(!propagate_down[2]) << "Cannot backpropagate to sequence indicators."; + if (!propagate_down[0] && !propagate_down[1]) { return; } + + const int num = bottom[0]->shape(1); + const int x_dim = hidden_dim_ * 4; + const Dtype* C_prev = bottom[0]->cpu_data(); + const Dtype* X = bottom[1]->cpu_data(); + const Dtype* flush = bottom[2]->cpu_data(); + const Dtype* C = top[0]->cpu_data(); + const Dtype* H = top[1]->cpu_data(); + const Dtype* C_diff = top[0]->cpu_diff(); + const Dtype* H_diff = top[1]->cpu_diff(); + Dtype* C_prev_diff = bottom[0]->mutable_cpu_diff(); + Dtype* X_diff = bottom[1]->mutable_cpu_diff(); + for (int n = 0; n < num; ++n) { + for (int d = 0; d < hidden_dim_; ++d) { + const Dtype i = sigmoid(X[d]); + const Dtype f = (*flush == 0) ? 0 : + (*flush * sigmoid(X[1 * hidden_dim_ + d])); + const Dtype o = sigmoid(X[2 * hidden_dim_ + d]); + const Dtype g = tanh(X[3 * hidden_dim_ + d]); + const Dtype c_prev = C_prev[d]; + const Dtype c = C[d]; + const Dtype tanh_c = tanh(c); + Dtype* c_prev_diff = C_prev_diff + d; + Dtype* i_diff = X_diff + d; + Dtype* f_diff = X_diff + 1 * hidden_dim_ + d; + Dtype* o_diff = X_diff + 2 * hidden_dim_ + d; + Dtype* g_diff = X_diff + 3 * hidden_dim_ + d; + const Dtype c_term_diff = + C_diff[d] + H_diff[d] * o * (1 - tanh_c * tanh_c); + *c_prev_diff = c_term_diff * f; + *i_diff = c_term_diff * g * i * (1 - i); + *f_diff = c_term_diff * c_prev * f * (1 - f); + *o_diff = H_diff[d] * tanh_c * o * (1 - o); + *g_diff = c_term_diff * i * (1 - g * g); + } + C_prev += hidden_dim_; + X += x_dim; + C += hidden_dim_; + H += hidden_dim_; + C_diff += hidden_dim_; + H_diff += hidden_dim_; + X_diff += x_dim; + C_prev_diff += hidden_dim_; + ++flush; + } +} + +#ifdef CPU_ONLY +STUB_GPU(LSTMUnitLayer); +#endif + +INSTANTIATE_CLASS(LSTMUnitLayer); +REGISTER_LAYER_CLASS(LSTMUnit); + +} // namespace caffe diff --git a/src/caffe/layers/lstm_unit_layer.cu b/src/caffe/layers/lstm_unit_layer.cu new file mode 100644 index 00000000000..d6bf85071f5 --- /dev/null +++ b/src/caffe/layers/lstm_unit_layer.cu @@ -0,0 +1,154 @@ +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/sequence_layers.hpp" + +namespace caffe { + +template +__device__ Dtype sigmoid(const Dtype x) { + return Dtype(1) / (Dtype(1) + exp(-x)); +} + +template +__device__ Dtype tanh(const Dtype x) { + return Dtype(2) * sigmoid(Dtype(2) * x) - Dtype(1); +} + +template +__global__ void LSTMActsForward(const int nthreads, const int dim, + const Dtype* X, Dtype* X_acts) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int x_dim = 4 * dim; + const int d = index % x_dim; + if (d < 3 * dim) { + X_acts[index] = sigmoid(X[index]); + } else { + X_acts[index] = tanh(X[index]); + } + } +} + +template +__global__ void LSTMUnitForward(const int nthreads, const int dim, + const Dtype* C_prev, const Dtype* X, const Dtype* flush, + Dtype* C, Dtype* H) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int n = index / dim; + const int d = index % dim; + const Dtype* X_offset = X + 4 * dim * n; + const Dtype i = X_offset[d]; + const Dtype f = X_offset[1 * dim + d]; + const Dtype o = X_offset[2 * dim + d]; + const Dtype g = X_offset[3 * dim + d]; + const Dtype c_prev = C_prev[index]; + const Dtype c = flush[n] * f * c_prev + i * g; + C[index] = c; + const Dtype tanh_c = tanh(c); + H[index] = o * tanh_c; + } +} + +template +void LSTMUnitLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const int count = top[1]->count(); + const Dtype* C_prev = bottom[0]->gpu_data(); + const Dtype* X = bottom[1]->gpu_data(); + const Dtype* flush = bottom[2]->gpu_data(); + Dtype* X_acts = X_acts_.mutable_gpu_data(); + Dtype* C = top[0]->mutable_gpu_data(); + Dtype* H = top[1]->mutable_gpu_data(); + const int X_count = bottom[1]->count(); + // NOLINT_NEXT_LINE(whitespace/operators) + LSTMActsForward<<>>( + X_count, hidden_dim_, X, X_acts); + CUDA_POST_KERNEL_CHECK; + // NOLINT_NEXT_LINE(whitespace/operators) + LSTMUnitForward<<>>( + count, hidden_dim_, C_prev, X_acts, flush, C, H); + CUDA_POST_KERNEL_CHECK; +} + +template +__global__ void LSTMUnitBackward(const int nthreads, const int dim, + const Dtype* C_prev, const Dtype* X, const Dtype* C, const Dtype* H, + const Dtype* flush, const Dtype* C_diff, const Dtype* H_diff, + Dtype* C_prev_diff, Dtype* X_diff) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int n = index / dim; + const int d = index % dim; + const Dtype* X_offset = X + 4 * dim * n; + const Dtype i = X_offset[d]; + const Dtype f = X_offset[1 * dim + d]; + const Dtype o = X_offset[2 * dim + d]; + const Dtype g = X_offset[3 * dim + d]; + const Dtype c_prev = C_prev[index]; + const Dtype c = C[index]; + const Dtype tanh_c = tanh(c); + Dtype* c_prev_diff = C_prev_diff + index; + Dtype* X_diff_offset = X_diff + 4 * dim * n; + Dtype* i_diff = X_diff_offset + d; + Dtype* f_diff = X_diff_offset + 1 * dim + d; + Dtype* o_diff = X_diff_offset + 2 * dim + d; + Dtype* g_diff = X_diff_offset + 3 * dim + d; + const Dtype c_term_diff = + C_diff[index] + H_diff[index] * o * (1 - tanh_c * tanh_c); + const Dtype flush_n = flush[n]; + *c_prev_diff = flush_n * c_term_diff * f; + *i_diff = c_term_diff * g; + *f_diff = flush_n * c_term_diff * c_prev; + *o_diff = H_diff[index] * tanh_c; + *g_diff = c_term_diff * i; + } +} + +template +__global__ void LSTMActsBackward(const int nthreads, const int dim, + const Dtype* X_acts, const Dtype* X_acts_diff, Dtype* X_diff) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int x_dim = 4 * dim; + const int d = index % x_dim; + const Dtype X_act = X_acts[index]; + if (d < 3 * dim) { + X_diff[index] = X_acts_diff[index] * X_act * (Dtype(1) - X_act); + } else { + X_diff[index] = X_acts_diff[index] * (Dtype(1) - X_act * X_act); + } + } +} + +template +void LSTMUnitLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + CHECK(!propagate_down[2]) << "Cannot backpropagate to sequence indicators."; + if (!propagate_down[0] && !propagate_down[1]) { return; } + + const int count = top[1]->count(); + const Dtype* C_prev = bottom[0]->gpu_data(); + const Dtype* X_acts = X_acts_.gpu_data(); + const Dtype* flush = bottom[2]->gpu_data(); + const Dtype* C = top[0]->gpu_data(); + const Dtype* H = top[1]->gpu_data(); + const Dtype* C_diff = top[0]->gpu_diff(); + const Dtype* H_diff = top[1]->gpu_diff(); + Dtype* C_prev_diff = bottom[0]->mutable_gpu_diff(); + Dtype* X_acts_diff = X_acts_.mutable_gpu_diff(); + LSTMUnitBackward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>(count, hidden_dim_, + C_prev, X_acts, C, H, flush, C_diff, H_diff, C_prev_diff, X_acts_diff); + CUDA_POST_KERNEL_CHECK; + const int X_count = bottom[1]->count(); + Dtype* X_diff = bottom[1]->mutable_gpu_diff(); + LSTMActsBackward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + X_count, hidden_dim_, X_acts, X_acts_diff, X_diff); + CUDA_POST_KERNEL_CHECK; +} + +INSTANTIATE_LAYER_GPU_FUNCS(LSTMUnitLayer); + +} // namespace caffe diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp index effdad90aff..42de4198bc4 100644 --- a/src/caffe/layers/memory_data_layer.cpp +++ b/src/caffe/layers/memory_data_layer.cpp @@ -19,10 +19,11 @@ void MemoryDataLayer::DataLayerSetUp(const vector*>& bottom, CHECK_GT(batch_size_ * size_, 0) << "batch_size, channels, height, and width must be specified and" " positive in memory_data_param"; + vector label_shape(1, batch_size_); top[0]->Reshape(batch_size_, channels_, height_, width_); - top[1]->Reshape(batch_size_, 1, 1, 1); + top[1]->Reshape(label_shape); added_data_.Reshape(batch_size_, channels_, height_, width_); - added_label_.Reshape(batch_size_, 1, 1, 1); + added_label_.Reshape(label_shape); data_ = NULL; labels_ = NULL; added_data_.cpu_data(); diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index 6f4c69c861e..c8d41499455 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -81,6 +81,8 @@ void PoolingLayer::LayerSetUp(const vector*>& bottom, template void PoolingLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); diff --git a/src/caffe/layers/recurrent_layer.cpp b/src/caffe/layers/recurrent_layer.cpp new file mode 100644 index 00000000000..7dc38fec901 --- /dev/null +++ b/src/caffe/layers/recurrent_layer.cpp @@ -0,0 +1,222 @@ +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/sequence_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +string RecurrentLayer::int_to_str(const int t) const { + ostringstream num; + num << t; + return num.str(); +} + +template +void RecurrentLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_GE(bottom[0]->num_axes(), 2) + << "bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)"; + T_ = bottom[0]->shape(0); + N_ = bottom[0]->shape(1); + LOG(INFO) << "Initializing recurrent layer: assuming input batch contains " + << T_ << " timesteps of " << N_ << " independent streams."; + + CHECK_EQ(bottom[1]->num_axes(), 2) + << "bottom[1] must have exactly 2 axes -- (#timesteps, #streams)"; + CHECK_EQ(T_, bottom[1]->shape(0)); + CHECK_EQ(N_, bottom[1]->shape(1)); + + // If provided, bottom[2] is a static input to the recurrent net. + static_input_ = (bottom.size() > 2); + if (static_input_) { + CHECK_GE(bottom[2]->num_axes(), 1); + CHECK_EQ(N_, bottom[2]->shape(0)); + } + + // Create a NetParameter; setup the inputs that aren't unique to particular + // recurrent architectures. + NetParameter net_param; + net_param.set_force_backward(true); + + net_param.add_input("x"); + BlobShape input_shape; + for (int i = 0; i < bottom[0]->num_axes(); ++i) { + input_shape.add_dim(bottom[0]->shape(i)); + } + net_param.add_input_shape()->CopyFrom(input_shape); + + input_shape.Clear(); + input_shape.add_dim(1); + for (int i = 0; i < bottom[1]->num_axes(); ++i) { + input_shape.add_dim(bottom[1]->shape(i)); + } + net_param.add_input("cont"); + net_param.add_input_shape()->CopyFrom(input_shape); + + if (static_input_) { + input_shape.Clear(); + for (int i = 0; i < bottom[2]->num_axes(); ++i) { + input_shape.add_dim(bottom[2]->shape(i)); + } + net_param.add_input("x_static"); + net_param.add_input_shape()->CopyFrom(input_shape); + } + + // Call the child's FillUnrolledNet implementation to specify the unrolled + // recurrent architecture. + this->FillUnrolledNet(&net_param); + + // Prepend this layer's name to the names of each layer in the unrolled net. + const string& layer_name = this->layer_param_.name(); + if (layer_name.size() > 0) { + for (int i = 0; i < net_param.layer_size(); ++i) { + LayerParameter* layer = net_param.mutable_layer(i); + layer->set_name(layer_name + "_" + layer->name()); + } + } + + // Create the unrolled net. + unrolled_net_.reset(new Net(net_param)); + unrolled_net_->set_debug_info( + this->layer_param_.recurrent_param().debug_info()); + + // Setup pointers to the inputs. + x_input_blob_ = CHECK_NOTNULL(unrolled_net_->blob_by_name("x").get()); + cont_input_blob_ = CHECK_NOTNULL(unrolled_net_->blob_by_name("cont").get()); + if (static_input_) { + x_static_input_blob_ = + CHECK_NOTNULL(unrolled_net_->blob_by_name("x_static").get()); + } + + // Setup pointers to paired recurrent inputs/outputs. + vector recur_input_names; + RecurrentInputBlobNames(&recur_input_names); + vector recur_output_names; + RecurrentOutputBlobNames(&recur_output_names); + const int num_recur_blobs = recur_input_names.size(); + CHECK_EQ(num_recur_blobs, recur_output_names.size()); + recur_input_blobs_.resize(num_recur_blobs); + recur_output_blobs_.resize(num_recur_blobs); + for (int i = 0; i < recur_input_names.size(); ++i) { + recur_input_blobs_[i] = + CHECK_NOTNULL(unrolled_net_->blob_by_name(recur_input_names[i]).get()); + recur_output_blobs_[i] = + CHECK_NOTNULL(unrolled_net_->blob_by_name(recur_output_names[i]).get()); + } + + // Setup pointers to outputs. + vector output_names; + OutputBlobNames(&output_names); + CHECK_EQ(top.size(), output_names.size()) + << "OutputBlobNames must provide an output blob name for each top."; + output_blobs_.resize(output_names.size()); + for (int i = 0; i < output_names.size(); ++i) { + output_blobs_[i] = + CHECK_NOTNULL(unrolled_net_->blob_by_name(output_names[i]).get()); + } + + // We should have 2 inputs (x and cont), plus a number of recurrent inputs, + // plus maybe a static input. + CHECK_EQ(2 + num_recur_blobs + static_input_, + unrolled_net_->input_blobs().size()); + + // This layer's parameters are any parameters in the layers of the unrolled + // net. We only want one copy of each parameter, so check that the parameter + // is "owned" by the layer, rather than shared with another. + this->blobs_.clear(); + for (int i = 0; i < unrolled_net_->params().size(); ++i) { + if (unrolled_net_->param_owners()[i] == -1) { + LOG(INFO) << "Adding parameter " << i << ": " + << unrolled_net_->param_display_names()[i]; + this->blobs_.push_back(unrolled_net_->params()[i]); + } + } + // Check that param_propagate_down is set for all of the parameters in the + // unrolled net; set param_propagate_down to true in this layer. + for (int i = 0; i < unrolled_net_->layers().size(); ++i) { + for (int j = 0; j < unrolled_net_->layers()[i]->blobs().size(); ++j) { + CHECK(unrolled_net_->layers()[i]->param_propagate_down(j)) + << "param_propagate_down not set for layer " << i << ", param " << j; + } + } + this->param_propagate_down_.clear(); + this->param_propagate_down_.resize(this->blobs_.size(), true); + + // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across + // batches. + for (int i = 0; i < recur_output_blobs_.size(); ++i) { + caffe_set(recur_output_blobs_[i]->count(), Dtype(0), + recur_output_blobs_[i]->mutable_cpu_diff()); + } +} + +template +void RecurrentLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(top.size(), output_blobs_.size()); + for (int i = 0; i < top.size(); ++i) { + top[i]->ReshapeLike(*output_blobs_[i]); + output_blobs_[i]->ShareData(*top[i]); + output_blobs_[i]->ShareDiff(*top[i]); + } + x_input_blob_->ShareData(*bottom[0]); + x_input_blob_->ShareDiff(*bottom[0]); + cont_input_blob_->ShareData(*bottom[1]); + if (static_input_) { + x_static_input_blob_->ShareData(*bottom[2]); + x_static_input_blob_->ShareDiff(*bottom[2]); + } +} + +template +void RecurrentLayer::Reset() { + // "Reset" the hidden state of the net by zeroing out all recurrent outputs. + for (int i = 0; i < recur_output_blobs_.size(); ++i) { + caffe_set(recur_output_blobs_[i]->count(), Dtype(0), + recur_output_blobs_[i]->mutable_cpu_data()); + } +} + +template +void RecurrentLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + // Hacky fix for test time... reshare all the shared blobs. + // TODO: somehow make this work non-hackily. + if (this->phase_ == TEST) { + unrolled_net_->ShareWeightData(); + } + + DCHECK_EQ(recur_input_blobs_.size(), recur_output_blobs_.size()); + for (int i = 0; i < recur_input_blobs_.size(); ++i) { + const int count = recur_input_blobs_[i]->count(); + DCHECK_EQ(count, recur_output_blobs_[i]->count()); + const Dtype* timestep_T_data = recur_output_blobs_[i]->cpu_data(); + Dtype* timestep_0_data = recur_input_blobs_[i]->mutable_cpu_data(); + caffe_copy(count, timestep_T_data, timestep_0_data); + } + + unrolled_net_->ForwardPrefilled(); +} + +template +void RecurrentLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + CHECK(!propagate_down[1]) << "Cannot backpropagate to sequence indicators."; + if (!propagate_down[0] && !propagate_down[2]) { return; } + + unrolled_net_->Backward(); +} + +#ifdef CPU_ONLY +STUB_GPU_FORWARD(RecurrentLayer, Forward); +#endif + +INSTANTIATE_CLASS(RecurrentLayer); + +} // namespace caffe diff --git a/src/caffe/layers/recurrent_layer.cu b/src/caffe/layers/recurrent_layer.cu new file mode 100644 index 00000000000..ce4b2f9b77c --- /dev/null +++ b/src/caffe/layers/recurrent_layer.cu @@ -0,0 +1,35 @@ +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/sequence_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void RecurrentLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + // Hacky fix for test time... reshare all the shared blobs. + // TODO: somehow make this work non-hackily. + if (this->phase_ == TEST) { + unrolled_net_->ShareWeightData(); + } + + DCHECK_EQ(recur_input_blobs_.size(), recur_output_blobs_.size()); + for (int i = 0; i < recur_input_blobs_.size(); ++i) { + const int count = recur_input_blobs_[i]->count(); + DCHECK_EQ(count, recur_output_blobs_[i]->count()); + const Dtype* timestep_T_data = recur_output_blobs_[i]->gpu_data(); + Dtype* timestep_0_data = recur_input_blobs_[i]->mutable_gpu_data(); + caffe_copy(count, timestep_T_data, timestep_0_data); + } + + unrolled_net_->ForwardPrefilled(); +} + +INSTANTIATE_LAYER_GPU_FORWARD(RecurrentLayer); + +} // namespace caffe diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp new file mode 100644 index 00000000000..f8e5122b297 --- /dev/null +++ b/src/caffe/layers/reshape_layer.cpp @@ -0,0 +1,20 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" + +namespace caffe { + +template +void ReshapeLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + top[0]->Reshape(this->layer_param_.reshape_param().shape()); + CHECK_EQ(top[0]->count(), bottom[0]->count()); + top[0]->ShareData(*bottom[0]); + top[0]->ShareDiff(*bottom[0]); +} + +INSTANTIATE_CLASS(ReshapeLayer); +REGISTER_LAYER_CLASS(Reshape); + +} // namespace caffe diff --git a/src/caffe/layers/rnn_layer.cpp b/src/caffe/layers/rnn_layer.cpp new file mode 100644 index 00000000000..a2a22f62819 --- /dev/null +++ b/src/caffe/layers/rnn_layer.cpp @@ -0,0 +1,217 @@ +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/sequence_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void RNNLayer::RecurrentInputBlobNames(vector* names) const { + names->resize(1); + (*names)[0] = "h_0"; +} + +template +void RNNLayer::RecurrentOutputBlobNames(vector* names) const { + names->resize(1); + (*names)[0] = "h_" + this->int_to_str(this->T_); +} + +template +void RNNLayer::OutputBlobNames(vector* names) const { + names->resize(1); + (*names)[0] = "o"; +} + +template +void RNNLayer::FillUnrolledNet(NetParameter* net_param) const { + const int num_output = this->layer_param_.recurrent_param().num_output(); + CHECK_GT(num_output, 0) << "num_output must be positive"; + const FillerParameter& weight_filler = + this->layer_param_.recurrent_param().weight_filler(); + const FillerParameter& bias_filler = + this->layer_param_.recurrent_param().bias_filler(); + + // Add generic LayerParameter's (without bottoms/tops) of layer types we'll + // use to save redundant code. + LayerParameter hidden_param; + hidden_param.set_type("InnerProduct"); + hidden_param.mutable_inner_product_param()->set_num_output(num_output); + hidden_param.mutable_inner_product_param()->set_bias_term(false); + hidden_param.mutable_inner_product_param()->set_axis(2); + hidden_param.mutable_inner_product_param()-> + mutable_weight_filler()->CopyFrom(weight_filler); + + LayerParameter biased_hidden_param(hidden_param); + biased_hidden_param.mutable_inner_product_param()->set_bias_term(true); + biased_hidden_param.mutable_inner_product_param()-> + mutable_bias_filler()->CopyFrom(bias_filler); + + LayerParameter sum_param; + sum_param.set_type("Eltwise"); + sum_param.mutable_eltwise_param()->set_operation( + EltwiseParameter_EltwiseOp_SUM); + + LayerParameter tanh_param; + tanh_param.set_type("TanH"); + + LayerParameter slice_param; + slice_param.set_type("Slice"); + slice_param.mutable_slice_param()->set_axis(0); + + BlobShape input_shape; + input_shape.add_dim(1); // h_0 is a single timestep + input_shape.add_dim(this->N_); + input_shape.add_dim(num_output); + net_param->add_input("h_0"); + net_param->add_input_shape()->CopyFrom(input_shape); + + LayerParameter* cont_slice_param = net_param->add_layer(); + cont_slice_param->CopyFrom(slice_param); + cont_slice_param->set_name("cont_slice"); + cont_slice_param->add_bottom("cont"); + cont_slice_param->mutable_slice_param()->set_axis(1); + + // Add layer to transform all timesteps of x to the hidden state dimension. + // W_xh_x = W_xh * x + b_h + { + LayerParameter* x_transform_param = net_param->add_layer(); + x_transform_param->CopyFrom(biased_hidden_param); + x_transform_param->set_name("x_transform"); + x_transform_param->add_param()->set_name("W_xh"); + x_transform_param->add_param()->set_name("b_h"); + x_transform_param->add_bottom("x"); + x_transform_param->add_top("W_xh_x"); + } + + if (this->static_input_) { + // Add layer to transform x_static to the hidden state dimension. + // W_xh_x_static = W_xh_static * x_static + LayerParameter* x_static_transform_param = net_param->add_layer(); + x_static_transform_param->CopyFrom(hidden_param); + x_static_transform_param->mutable_inner_product_param()->set_axis(1); + x_static_transform_param->set_name("W_xh_x_static"); + x_static_transform_param->add_param()->set_name("W_xh_static"); + x_static_transform_param->add_bottom("x_static"); + x_static_transform_param->add_top("W_xh_x_static"); + + LayerParameter* reshape_param = net_param->add_layer(); + reshape_param->set_type("Reshape"); + BlobShape* new_shape = + reshape_param->mutable_reshape_param()->mutable_shape(); + new_shape->add_dim(1); // One timestep. + new_shape->add_dim(this->N_); + new_shape->add_dim( + x_static_transform_param->inner_product_param().num_output()); + reshape_param->set_name("W_xh_x_static_reshape"); + reshape_param->add_bottom("W_xh_x_static"); + reshape_param->add_top("W_xh_x_static"); + } + + LayerParameter* x_slice_param = net_param->add_layer(); + x_slice_param->CopyFrom(slice_param); + x_slice_param->set_name("W_xh_x_slice"); + x_slice_param->add_bottom("W_xh_x"); + + LayerParameter output_concat_layer; + output_concat_layer.set_name("o_concat"); + output_concat_layer.set_type("Concat"); + output_concat_layer.add_top("o"); + output_concat_layer.mutable_concat_param()->set_axis(0); + + for (int t = 1; t <= this->T_; ++t) { + string tm1s = this->int_to_str(t - 1); + string ts = this->int_to_str(t); + + cont_slice_param->add_top("cont_" + ts); + x_slice_param->add_top("W_xh_x_" + ts); + + // Add layer to flush the hidden state when beginning a new sequence, + // as indicated by cont_t. + // h_conted_{t-1} := cont_t * h_{t-1} + // + // Normally, cont_t is binary (i.e., 0 or 1), so: + // h_conted_{t-1} := h_{t-1} if cont_t == 1 + // 0 otherwise + { + LayerParameter* cont_h_param = net_param->add_layer(); + cont_h_param->CopyFrom(sum_param); + cont_h_param->mutable_eltwise_param()->set_coeff_blob(true); + cont_h_param->set_name("h_conted_" + tm1s); + cont_h_param->add_bottom("h_" + tm1s); + cont_h_param->add_bottom("cont_" + ts); + cont_h_param->add_top("h_conted_" + tm1s); + } + + // Add layer to compute + // W_hh_h_{t-1} := W_hh * h_conted_{t-1} + { + LayerParameter* w_param = net_param->add_layer(); + w_param->CopyFrom(hidden_param); + w_param->set_name("W_hh_h_" + tm1s); + w_param->add_param()->set_name("W_hh"); + w_param->add_bottom("h_conted_" + tm1s); + w_param->add_top("W_hh_h_" + tm1s); + w_param->mutable_inner_product_param()->set_axis(2); + } + + // Add layers to compute + // h_t := \tanh( W_hh * h_conted_{t-1} + W_xh * x_t + b_h ) + // = \tanh( W_hh_h_{t-1} + W_xh_t ) + { + LayerParameter* h_input_sum_param = net_param->add_layer(); + h_input_sum_param->CopyFrom(sum_param); + h_input_sum_param->set_name("h_input_sum_" + ts); + h_input_sum_param->add_bottom("W_hh_h_" + tm1s); + h_input_sum_param->add_bottom("W_xh_x_" + ts); + if (this->static_input_) { + h_input_sum_param->add_bottom("W_xh_x_static"); + } + h_input_sum_param->add_top("h_neuron_input_" + ts); + } + { + LayerParameter* h_neuron_param = net_param->add_layer(); + h_neuron_param->CopyFrom(tanh_param); + h_neuron_param->set_name("h_neuron_" + ts); + h_neuron_param->add_bottom("h_neuron_input_" + ts); + h_neuron_param->add_top("h_" + ts); + } + + // Add layer to compute + // W_ho_h_t := W_ho * h_t + b_o + { + LayerParameter* w_param = net_param->add_layer(); + w_param->CopyFrom(biased_hidden_param); + w_param->set_name("W_ho_h_" + ts); + w_param->add_param()->set_name("W_ho"); + w_param->add_param()->set_name("b_o"); + w_param->add_bottom("h_" + ts); + w_param->add_top("W_ho_h_" + ts); + w_param->mutable_inner_product_param()->set_axis(2); + } + + // Add layers to compute + // o_t := \tanh( W_ho h_t + b_o) + // = \tanh( W_ho_h_t ) + { + LayerParameter* o_neuron_param = net_param->add_layer(); + o_neuron_param->CopyFrom(tanh_param); + o_neuron_param->set_name("o_neuron_" + ts); + o_neuron_param->add_bottom("W_ho_h_" + ts); + o_neuron_param->add_top("o_" + ts); + } + output_concat_layer.add_bottom("o_" + ts); + } // for (int t = 1; t <= this->T_; ++t) + + net_param->add_layer()->CopyFrom(output_concat_layer); +} + +INSTANTIATE_CLASS(RNNLayer); +REGISTER_LAYER_CLASS(RNN); + +} // namespace caffe diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp index 46c3acd6513..e4418c9cf9c 100644 --- a/src/caffe/layers/slice_layer.cpp +++ b/src/caffe/layers/slice_layer.cpp @@ -11,9 +11,8 @@ template void SliceLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { const SliceParameter& slice_param = this->layer_param_.slice_param(); - slice_dim_ = slice_param.slice_dim(); - CHECK_GE(slice_dim_, 0); - CHECK_LE(slice_dim_, 1) << "Can only slice num and channels"; + CHECK(!(slice_param.has_axis() && slice_param.has_slice_dim())) + << "Either axis or slice_dim should be specified; not both."; slice_point_.clear(); std::copy(slice_param.slice_point().begin(), slice_param.slice_point().end(), @@ -23,18 +22,27 @@ void SliceLayer::LayerSetUp(const vector*>& bottom, template void SliceLayer::Reshape(const vector*>& bottom, const vector*>& top) { - count_ = 0; - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); + const int num_axes = bottom[0]->num_axes(); + const SliceParameter& slice_param = this->layer_param_.slice_param(); + if (slice_param.has_slice_dim()) { + slice_axis_ = static_cast(slice_param.slice_dim()); + // Don't allow negative indexing for slice_dim, a uint32 -- almost + // certainly unintended. + CHECK_GE(slice_axis_, 0) << "casting slice_dim from uint32 to int32 " + << "produced negative result; slice_dim must satisfy " + << "0 <= slice_dim < " << kMaxBlobAxes; + CHECK_LT(slice_axis_, num_axes) << "slice_dim out of range."; + } else { + slice_axis_ = bottom[0]->CanonicalAxisIndex(slice_param.axis()); + } + vector top_shape = bottom[0]->shape(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + num_slices_ = bottom[0]->count(0, slice_axis_); + slice_size_ = bottom[0]->count(slice_axis_ + 1); + int count = 0; if (slice_point_.size() != 0) { CHECK_EQ(slice_point_.size(), top.size() - 1); - if (slice_dim_ == 0) { - CHECK_LE(top.size(), num_); - } else { - CHECK_LE(top.size(), channels_); - } + CHECK_LE(top.size(), bottom_slice_axis); int prev = 0; vector slices; for (int i = 0; i < slice_point_.size(); ++i) { @@ -42,94 +50,64 @@ void SliceLayer::Reshape(const vector*>& bottom, slices.push_back(slice_point_[i] - prev); prev = slice_point_[i]; } - if (slice_dim_ == 0) { - slices.push_back(num_ - prev); - for (int i = 0; i < top.size(); ++i) { - top[i]->Reshape(slices[i], channels_, height_, width_); - count_ += top[i]->count(); - } - } else { - slices.push_back(channels_ - prev); - for (int i = 0; i < top.size(); ++i) { - top[i]->Reshape(num_, slices[i], height_, width_); - count_ += top[i]->count(); - } + slices.push_back(bottom_slice_axis - prev); + for (int i = 0; i < top.size(); ++i) { + top_shape[slice_axis_] = slices[i]; + top[i]->Reshape(top_shape); + count += top[i]->count(); } } else { - if (slice_dim_ == 0) { - CHECK_EQ(num_ % top.size(), 0) - << "Number of top blobs (" << top.size() << ") " - << "should evenly divide input num ( " << num_ << ")"; - num_ = num_ / top.size(); - } else { - CHECK_EQ(channels_ % top.size(), 0) - << "Number of top blobs (" << top.size() << ") " - << "should evenly divide input channels ( " << channels_ << ")"; - channels_ = channels_ / top.size(); - } + CHECK_EQ(bottom_slice_axis % top.size(), 0) + << "Number of top blobs (" << top.size() << ") should evenly " + << "divide input slice axis (" << bottom_slice_axis << ")"; + top_shape[slice_axis_] = bottom_slice_axis / top.size(); for (int i = 0; i < top.size(); ++i) { - top[i]->Reshape(num_, channels_, height_, width_); - count_ += top[i]->count(); + top[i]->Reshape(top_shape); + count += top[i]->count(); } } - CHECK_EQ(count_, bottom[0]->count()); + CHECK_EQ(count, bottom[0]->count()); } template void SliceLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { - const Dtype* bottom_data = bottom[0]->mutable_cpu_data(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_cpu_data(); - caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num), - top_data); - offset_num += blob->num(); + int offset_slice_axis = 0; + const Dtype* bottom_data = bottom[0]->cpu_data(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + Dtype* top_data = top[i]->mutable_cpu_data(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + bottom_data + bottom_offset, top_data + top_offset); } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_cpu_data(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel), - top_data + blob->offset(n)); - } - offset_channel += blob->channels(); - } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } template void SliceLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { if (!propagate_down[0]) { return; } + int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->cpu_diff(); - caffe_copy(blob->count(), top_diff, - bottom_diff + bottom[0]->offset(offset_num)); - offset_num += blob->num(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->cpu_diff(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + top_diff + top_offset, bottom_diff + bottom_offset); } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->cpu_diff(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + blob->offset(n), - bottom_diff + bottom[0]->offset(n, offset_channel)); - } - offset_channel += blob->channels(); - } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } #ifdef CPU_ONLY diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu index b5c5e61533f..e6e65677bd8 100644 --- a/src/caffe/layers/slice_layer.cu +++ b/src/caffe/layers/slice_layer.cu @@ -9,58 +9,42 @@ namespace caffe { template void SliceLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { - const Dtype* bottom_data = bottom[0]->mutable_gpu_data(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_gpu_data(); - caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num), - top_data); - offset_num += blob->num(); + int offset_slice_axis = 0; + const Dtype* bottom_data = bottom[0]->gpu_data(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + Dtype* top_data = top[i]->mutable_gpu_data(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + bottom_data + bottom_offset, top_data + top_offset); } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_gpu_data(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel), - top_data + blob->offset(n)); - } - offset_channel += blob->channels(); - } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } template void SliceLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { if (!propagate_down[0]) { return; } + int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->gpu_diff(); - caffe_copy(blob->count(), top_diff, - bottom_diff + bottom[0]->offset(offset_num)); - offset_num += blob->num(); - } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->gpu_diff(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + blob->offset(n), - bottom_diff + bottom[0]->offset(n, offset_channel)); - } - offset_channel += blob->channels(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->gpu_diff(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + top_diff + top_offset, bottom_diff + bottom_offset); } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } INSTANTIATE_LAYER_GPU_FUNCS(SliceLayer); diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp index 25142fdec53..04712c9e653 100644 --- a/src/caffe/layers/softmax_layer.cpp +++ b/src/caffe/layers/softmax_layer.cpp @@ -10,14 +10,18 @@ namespace caffe { template void SoftmaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { - top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - sum_multiplier_.Reshape(1, bottom[0]->channels(), 1, 1); + softmax_axis_ = + bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis()); + top[0]->ReshapeLike(*bottom[0]); + vector mult_dims(1, bottom[0]->shape(softmax_axis_)); + sum_multiplier_.Reshape(mult_dims); Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); - for (int i = 0; i < sum_multiplier_.count(); ++i) { - multiplier_data[i] = 1.; - } - scale_.Reshape(bottom[0]->num(), 1, bottom[0]->height(), bottom[0]->width()); + caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data); + outer_num_ = bottom[0]->count(0, softmax_axis_); + inner_num_ = bottom[0]->count(softmax_axis_ + 1); + vector scale_dims = bottom[0]->shape(); + scale_dims[softmax_axis_] = 1; + scale_.Reshape(scale_dims); } template @@ -26,34 +30,32 @@ void SoftmaxLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* scale_data = scale_.mutable_cpu_data(); - int num = bottom[0]->num(); - int channels = bottom[0]->channels(); - int dim = bottom[0]->count() / bottom[0]->num(); - int spatial_dim = bottom[0]->height() * bottom[0]->width(); + int channels = bottom[0]->shape(softmax_axis_); + int dim = bottom[0]->count() / outer_num_; caffe_copy(bottom[0]->count(), bottom_data, top_data); // We need to subtract the max to avoid numerical issues, compute the exp, // and then normalize. - for (int i = 0; i < num; ++i) { + for (int i = 0; i < outer_num_; ++i) { // initialize scale_data to the first plane - caffe_copy(spatial_dim, bottom_data + i * dim, scale_data); + caffe_copy(inner_num_, bottom_data + i * dim, scale_data); for (int j = 0; j < channels; j++) { - for (int k = 0; k < spatial_dim; k++) { + for (int k = 0; k < inner_num_; k++) { scale_data[k] = std::max(scale_data[k], - bottom_data[i * dim + j * spatial_dim + k]); + bottom_data[i * dim + j * inner_num_ + k]); } } // subtraction - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, spatial_dim, - 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data + i * dim); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, inner_num_, + 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data); // exponentiation - caffe_exp(dim, top_data + i * dim, top_data + i * dim); + caffe_exp(dim, top_data, top_data); // sum after exp - caffe_cpu_gemv(CblasTrans, channels, spatial_dim, 1., - top_data + i * dim, sum_multiplier_.cpu_data(), 0., scale_data); + caffe_cpu_gemv(CblasTrans, channels, inner_num_, 1., + top_data, sum_multiplier_.cpu_data(), 0., scale_data); // division for (int j = 0; j < channels; j++) { - caffe_div(spatial_dim, top_data + top[0]->offset(i, j), scale_data, - top_data + top[0]->offset(i, j)); + caffe_div(inner_num_, top_data, scale_data, top_data); + top_data += inner_num_; } } } @@ -66,20 +68,18 @@ void SoftmaxLayer::Backward_cpu(const vector*>& top, const Dtype* top_data = top[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* scale_data = scale_.mutable_cpu_data(); - int num = top[0]->num(); - int channels = top[0]->channels(); - int dim = top[0]->count() / top[0]->num(); - int spatial_dim = top[0]->height() * top[0]->width(); + int channels = top[0]->shape(softmax_axis_); + int dim = top[0]->count() / outer_num_; caffe_copy(top[0]->count(), top_diff, bottom_diff); - for (int i = 0; i < num; ++i) { + for (int i = 0; i < outer_num_; ++i) { // compute dot(top_diff, top_data) and subtract them from the bottom diff - for (int k = 0; k < spatial_dim; ++k) { + for (int k = 0; k < inner_num_; ++k) { scale_data[k] = caffe_cpu_strided_dot(channels, - bottom_diff + i * dim + k, spatial_dim, - top_data + i * dim + k, spatial_dim); + bottom_diff + i * dim + k, inner_num_, + top_data + i * dim + k, inner_num_); } // subtraction - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, spatial_dim, 1, + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, inner_num_, 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., bottom_diff + i * dim); } // elementwise multiplication diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu index 6b8871a0b20..1f9c3a41203 100644 --- a/src/caffe/layers/softmax_layer.cu +++ b/src/caffe/layers/softmax_layer.cu @@ -90,36 +90,33 @@ void SoftmaxLayer::Forward_gpu(const vector*>& bottom, Dtype* top_data = top[0]->mutable_gpu_data(); Dtype* scale_data = scale_.mutable_gpu_data(); int count = bottom[0]->count(); - int num = bottom[0]->num(); - int channels = bottom[0]->channels(); - int spatial_dim = bottom[0]->height() * bottom[0]->width(); + int channels = top[0]->shape(softmax_axis_); caffe_copy(count, bottom_data, top_data); // We need to subtract the max to avoid numerical issues, compute the exp, // and then normalize. // compute max // NOLINT_NEXT_LINE(whitespace/operators) - kernel_channel_max<<>>(num, channels, spatial_dim, top_data, + kernel_channel_max<<>>(outer_num_, channels, inner_num_, top_data, scale_data); // subtract // NOLINT_NEXT_LINE(whitespace/operators) kernel_channel_subtract<<>>(count, num, channels, spatial_dim, + CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_, scale_data, top_data); // exponentiate // NOLINT_NEXT_LINE(whitespace/operators) - kernel_exp<<>>(num * channels * spatial_dim, top_data, - top_data); + kernel_exp<<>>( + count, top_data, top_data); // sum after exp // NOLINT_NEXT_LINE(whitespace/operators) - kernel_channel_sum<<>>(num, channels, spatial_dim, top_data, + kernel_channel_sum<<>>(outer_num_, channels, inner_num_, top_data, scale_data); // divide // NOLINT_NEXT_LINE(whitespace/operators) kernel_channel_div<<>>(count, num, channels, spatial_dim, + CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_, scale_data, top_data); } @@ -131,18 +128,16 @@ void SoftmaxLayer::Backward_gpu(const vector*>& top, Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); Dtype* scale_data = scale_.mutable_gpu_data(); int count = top[0]->count(); - int num = top[0]->num(); - int channels = top[0]->channels(); - int spatial_dim = top[0]->height() * top[0]->width(); - caffe_copy(top[0]->count(), top_diff, bottom_diff); + int channels = top[0]->shape(softmax_axis_); + caffe_copy(count, top_diff, bottom_diff); // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff. // NOLINT_NEXT_LINE(whitespace/operators) - kernel_channel_dot<<>>(num, channels, spatial_dim, top_diff, top_data, - scale_data); + kernel_channel_dot<<>>(outer_num_, channels, inner_num_, + top_diff, top_data, scale_data); // NOLINT_NEXT_LINE(whitespace/operators) kernel_channel_subtract<<>>(count, num, channels, spatial_dim, + CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_, scale_data, bottom_diff); // elementwise multiplication caffe_gpu_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index 0c9ba2c6626..132c30796a4 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -35,6 +35,14 @@ void SoftmaxWithLossLayer::Reshape( const vector*>& bottom, const vector*>& top) { LossLayer::Reshape(bottom, top); softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); + softmax_axis_ = this->layer_param_.softmax_param().axis(); + outer_num_ = bottom[0]->count(0, softmax_axis_); + inner_num_ = bottom[0]->count(softmax_axis_ + 1); + CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) + << "Number of labels must match number of predictions; " + << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " + << "label count (number of labels) must be N*H*W, " + << "with integer values in {0, 1, ..., C-1}."; if (top.size() >= 2) { // softmax output top[1]->ReshapeLike(*bottom[0]); @@ -48,20 +56,18 @@ void SoftmaxWithLossLayer::Forward_cpu( softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); - int num = prob_.num(); - int dim = prob_.count() / num; - int spatial_dim = prob_.height() * prob_.width(); + int dim = prob_.count() / outer_num_; int count = 0; Dtype loss = 0; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < spatial_dim; j++) { - const int label_value = static_cast(label[i * spatial_dim + j]); + for (int i = 0; i < outer_num_; ++i) { + for (int j = 0; j < inner_num_; j++) { + const int label_value = static_cast(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); - DCHECK_LT(label_value, prob_.channels()); - loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j], + DCHECK_LT(label_value, prob_.shape(softmax_axis_)); + loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN))); ++count; } @@ -69,7 +75,7 @@ void SoftmaxWithLossLayer::Forward_cpu( if (normalize_) { top[0]->mutable_cpu_data()[0] = loss / count; } else { - top[0]->mutable_cpu_data()[0] = loss / num; + top[0]->mutable_cpu_data()[0] = loss / outer_num_; } if (top.size() == 2) { top[1]->ShareData(prob_); @@ -88,19 +94,17 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); - int num = prob_.num(); - int dim = prob_.count() / num; - int spatial_dim = prob_.height() * prob_.width(); + int dim = prob_.count() / outer_num_; int count = 0; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < spatial_dim; ++j) { - const int label_value = static_cast(label[i * spatial_dim + j]); + for (int i = 0; i < outer_num_; ++i) { + for (int j = 0; j < inner_num_; ++j) { + const int label_value = static_cast(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { - for (int c = 0; c < bottom[0]->channels(); ++c) { - bottom_diff[i * dim + c * spatial_dim + j] = 0; + for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { + bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { - bottom_diff[i * dim + label_value * spatial_dim + j] -= 1; + bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } @@ -110,7 +114,7 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, if (normalize_) { caffe_scal(prob_.count(), loss_weight / count, bottom_diff); } else { - caffe_scal(prob_.count(), loss_weight / num, bottom_diff); + caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); } } } diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 215d589ffee..7e0f3da4552 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -35,10 +35,8 @@ void SoftmaxWithLossLayer::Forward_gpu( softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.gpu_data(); const Dtype* label = bottom[1]->gpu_data(); - const int num = prob_.num(); - const int dim = prob_.count() / num; - const int spatial_dim = prob_.height() * prob_.width(); - const int nthreads = num * spatial_dim; + const int dim = prob_.count() / outer_num_; + const int nthreads = outer_num_ * inner_num_; // Since this memory is not used for anything until it is overwritten // on the backward pass, we use it here to avoid having to allocate new GPU // memory to accumulate intermediate results in the kernel. @@ -49,7 +47,7 @@ void SoftmaxWithLossLayer::Forward_gpu( // NOLINT_NEXT_LINE(whitespace/operators) SoftmaxLossForwardGPU<<>>(nthreads, prob_data, label, loss_data, - num, dim, spatial_dim, has_ignore_label_, ignore_label_, counts); + outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); Dtype loss; caffe_gpu_asum(nthreads, loss_data, &loss); if (normalize_) { @@ -57,7 +55,7 @@ void SoftmaxWithLossLayer::Forward_gpu( caffe_gpu_asum(nthreads, counts, &count); loss /= count; } else { - loss /= num; + loss /= outer_num_; } top[0]->mutable_cpu_data()[0] = loss; if (top.size() == 2) { @@ -102,24 +100,22 @@ void SoftmaxWithLossLayer::Backward_gpu(const vector*>& top, const Dtype* top_data = top[0]->gpu_data(); caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff); const Dtype* label = bottom[1]->gpu_data(); - const int num = prob_.num(); - const int dim = prob_.count() / num; - const int spatial_dim = prob_.height() * prob_.width(); - const int nthreads = num * spatial_dim; + const int dim = prob_.count() / outer_num_; + const int nthreads = outer_num_ * inner_num_; // Since this memory is never used for anything else, // we use to to avoid allocating new GPU memory. Dtype* counts = prob_.mutable_gpu_diff(); // NOLINT_NEXT_LINE(whitespace/operators) SoftmaxLossBackwardGPU<<>>(nthreads, top_data, label, bottom_diff, - num, dim, spatial_dim, has_ignore_label_, ignore_label_, counts); + outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); const Dtype loss_weight = top[0]->cpu_diff()[0]; if (normalize_) { Dtype count; caffe_gpu_asum(nthreads, counts, &count); caffe_gpu_scal(prob_.count(), loss_weight / count, bottom_diff); } else { - caffe_gpu_scal(prob_.count(), loss_weight / num, bottom_diff); + caffe_gpu_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); } } } diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp index d6929b99683..272cb59cd37 100644 --- a/src/caffe/layers/split_layer.cpp +++ b/src/caffe/layers/split_layer.cpp @@ -18,8 +18,7 @@ void SplitLayer::Reshape(const vector*>& bottom, // some strange effects in practice...) CHECK_NE(top[i], bottom[0]) << this->type() << " Layer does not " "allow in-place computation."; - top[i]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); + top[i]->ReshapeLike(*bottom[0]); CHECK_EQ(count_, top[i]->count()); } } diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 36e41560327..c127d56bc46 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -177,8 +177,9 @@ void WindowDataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label - top[1]->Reshape(batch_size, 1, 1, 1); - this->prefetch_label_.Reshape(batch_size, 1, 1, 1); + vector label_shape(1, batch_size); + top[1]->Reshape(label_shape); + this->prefetch_label_.Reshape(label_shape); // data mean has_mean_file_ = this->transform_param_.has_mean_file(); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index c359be9b575..bd6c22e5e5c 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -48,8 +48,16 @@ void Net::Init(const NetParameter& in_param) { name_ = param.name(); map blob_name_to_idx; set available_blobs; - CHECK_EQ(param.input_size() * 4, param.input_dim_size()) - << "Incorrect input blob dimension specifications."; + CHECK(param.input_dim_size() == 0 || param.input_shape_size() == 0) + << "Must specify either input_shape OR deprecated input_dim, not both."; + if (param.input_dim_size() > 0) { + // Deprecated 4D dimensions. + CHECK_EQ(param.input_size() * 4, param.input_dim_size()) + << "Incorrect input blob dimension specifications."; + } else { + CHECK_EQ(param.input_size(), param.input_shape_size()) + << "Exactly one input_shape must be specified per input."; + } memory_used_ = 0; // set the input blobs for (int input_id = 0; input_id < param.input_size(); ++input_id) { @@ -109,11 +117,7 @@ void Net::Init(const NetParameter& in_param) { blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); } blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id); - LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->num() << " " - << top_vecs_[layer_id][top_id]->channels() << " " - << top_vecs_[layer_id][top_id]->height() << " " - << top_vecs_[layer_id][top_id]->width() << " (" - << top_vecs_[layer_id][top_id]->count() << ")"; + LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string(); if (layer->loss(top_id)) { LOG(INFO) << " with loss weight " << layer->loss(top_id); } @@ -209,6 +213,7 @@ void Net::Init(const NetParameter& in_param) { layer_names_index_[layer_names_[layer_id]] = layer_id; } GetLearningRateAndWeightDecay(); + ShareWeightData(); debug_info_ = param.debug_info(); LOG(INFO) << "Network initialization done."; LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); @@ -343,10 +348,14 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; } if (layer_id == -1) { // Set the (explicitly specified) dimensions of the input blob. - blob_pointer->Reshape(param.input_dim(top_id * 4), - param.input_dim(top_id * 4 + 1), - param.input_dim(top_id * 4 + 2), - param.input_dim(top_id * 4 + 3)); + if (param.input_dim_size() > 0) { + blob_pointer->Reshape(param.input_dim(top_id * 4), + param.input_dim(top_id * 4 + 1), + param.input_dim(top_id * 4 + 2), + param.input_dim(top_id * 4 + 3)); + } else { + blob_pointer->Reshape(param.input_shape(top_id)); + } net_input_blob_indices_.push_back(blob_id); net_input_blobs_.push_back(blob_pointer.get()); } else { @@ -427,17 +436,8 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, << "Shared parameter blobs must have the same count."; } else { // Strict dimension checking -- all dims must be the same. - CHECK_EQ(this_blob->num(), owner_blob->num()) - << "Shared parameter blobs must have the same num."; - CHECK_EQ(this_blob->channels(), owner_blob->channels()) - << "Shared parameter blobs must have the same channels."; - CHECK_EQ(this_blob->height(), owner_blob->height()) - << "Shared parameter blobs must have the same height."; - CHECK_EQ(this_blob->width(), owner_blob->width()) - << "Shared parameter blobs must have the same width."; + CHECK(this_blob->shape() == owner_blob->shape()); } - layers_[layer_id]->blobs()[param_id]->ShareData( - *layers_[owner_layer_id]->blobs()[owner_param_id]); } } @@ -640,10 +640,7 @@ void Net::ShareTrainedLayersWith(const Net* other) { << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { Blob* source_blob = source_layer->blobs()[j].get(); - CHECK_EQ(target_blobs[j]->num(), source_blob->num()); - CHECK_EQ(target_blobs[j]->channels(), source_blob->channels()); - CHECK_EQ(target_blobs[j]->height(), source_blob->height()); - CHECK_EQ(target_blobs[j]->width(), source_blob->width()); + CHECK(target_blobs[j]->shape() == source_blob->shape()); target_blobs[j]->ShareData(*source_blob); } } @@ -707,11 +704,8 @@ void Net::CopyTrainedLayersFrom(const NetParameter& param) { CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { - CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num()); - CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels()); - CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height()); - CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width()); - target_blobs[j]->FromProto(source_layer.blobs(j)); + const bool kReshape = false; + target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); } } } @@ -746,35 +740,7 @@ void Net::ToProto(NetParameter* param, bool write_diff) const { template void Net::Update() { - // First, accumulate the diffs of any shared parameters into their owner's - // diff. (Assumes that the learning rate, weight decay, etc. have already been - // accounted for in the current diff.) - for (int i = 0; i < params_.size(); ++i) { - if (param_owners_[i] < 0) { continue; } - if (debug_info_) { UpdateDebugInfo(i); } - const int count = params_[i]->count(); - const Dtype* this_diff; - Dtype* owner_diff; - switch (Caffe::mode()) { - case Caffe::CPU: - this_diff = params_[i]->cpu_diff(); - owner_diff = params_[param_owners_[i]]->mutable_cpu_diff(); - caffe_add(count, this_diff, owner_diff, owner_diff); - break; -#ifndef CPU_ONLY - case Caffe::GPU: - this_diff = params_[i]->gpu_diff(); - owner_diff = params_[param_owners_[i]]->mutable_gpu_diff(); - caffe_gpu_add(count, this_diff, owner_diff, owner_diff); - break; -#else - NO_GPU; -#endif - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } - } - // Now, update the owned parameters. + // Update only the owned parameters. for (int i = 0; i < params_.size(); ++i) { if (param_owners_[i] >= 0) { continue; } if (debug_info_) { UpdateDebugInfo(i); } @@ -782,6 +748,15 @@ void Net::Update() { } } +template +void Net::ShareWeightData() { + for (int i = 0; i < params_.size(); ++i) { + if (param_owners_[i] < 0) { continue; } + params_[i]->ShareData(*params_[param_owners_[i]]); + params_[i]->ShareDiff(*params_[param_owners_[i]]); + } +} + template bool Net::has_blob(const string& blob_name) const { return blob_names_index_.find(blob_name) != blob_names_index_.end(); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 84b475ce3cd..227271d6918 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -2,13 +2,21 @@ syntax = "proto2"; package caffe; +// Specifies the shape (dimensions) of a Blob. +message BlobShape { + repeated int64 dim = 1 [packed = true]; +} + message BlobProto { + optional BlobShape shape = 7; + repeated float data = 5 [packed = true]; + repeated float diff = 6 [packed = true]; + + // 4D dimensions -- deprecated. Use "shape" instead. optional int32 num = 1 [default = 0]; optional int32 channels = 2 [default = 0]; optional int32 height = 3 [default = 0]; optional int32 width = 4 [default = 0]; - repeated float data = 5 [packed = true]; - repeated float diff = 6 [packed = true]; } // The BlobProtoVector is simply a way to pass multiple blobproto instances @@ -47,10 +55,15 @@ message NetParameter { optional string name = 1; // consider giving the network a name // The input blobs to the network. repeated string input = 3; - // The dim of the input blobs. For each input blob there should be four + // The shape of the input blobs. + repeated BlobShape input_shape = 8; + + // 4D input dimensions -- deprecated. Use "shape" instead. + // If specified, for each input blob there should be four // values specifying the num, channels, height and width of the input blob. // Thus, there should be a total of (4 * #input) numbers. repeated int32 input_dim = 4; + // Whether the network will force every layer to carry out backward operation. // If set False, then whether to carry out backward is determined // automatically according to the net structure and learning rates. @@ -75,7 +88,7 @@ message NetParameter { // NOTE // Update the next available ID when you add a new SolverParameter field. // -// SolverParameter next available ID: 36 (last added: clip_gradients) +// SolverParameter next available ID: 37 (last added: iter_size) message SolverParameter { ////////////////////////////////////////////////////////////////////////////// // Specifying the train and test networks @@ -128,6 +141,7 @@ message SolverParameter { // Display the loss averaged over the last average_loss iterations optional int32 average_loss = 33 [default = 1]; optional int32 max_iter = 7; // the maximum number of iterations + optional int32 iter_size = 36 [default = 1]; optional string lr_policy = 8; // The learning rate decay policy. optional float gamma = 9; // The parameter to compute the learning rate. optional float power = 10; // The parameter to compute the learning rate. @@ -246,7 +260,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 131 (last added: python_param) +// LayerParameter next available layer-specific ID: 134 (last added: recurrent_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -298,6 +312,7 @@ message LayerParameter { optional DropoutParameter dropout_param = 108; optional DummyDataParameter dummy_data_param = 109; optional EltwiseParameter eltwise_param = 110; + optional EmbedParameter embed_param = 131; optional ExpParameter exp_param = 111; optional HDF5DataParameter hdf5_data_param = 112; optional HDF5OutputParameter hdf5_output_param = 113; @@ -311,7 +326,9 @@ message LayerParameter { optional PoolingParameter pooling_param = 121; optional PowerParameter power_param = 122; optional PythonParameter python_param = 130; + optional RecurrentParameter recurrent_param = 133; optional ReLUParameter relu_param = 123; + optional ReshapeParameter reshape_param = 132; optional SigmoidParameter sigmoid_param = 124; optional SoftmaxParameter softmax_param = 125; optional SliceParameter slice_param = 126; @@ -354,6 +371,12 @@ message AccuracyParameter { // the top k scoring classes. By default, only compare to the top scoring // class (i.e. argmax). optional uint32 top_k = 1 [default = 1]; + + // Controls the denominator in the computed accuracy = #correct / denominator. + // Must be a positive number, or the default of 0, for the total input weight. + // If no input weights are used, the denominator is the batch size, as the + // weights each default to 1. + optional float denominator = 2 [default = 0]; } // Message that stores parameters used by ArgMaxLayer @@ -365,9 +388,13 @@ message ArgMaxParameter { // Message that stores parameters used by ConcatLayer message ConcatParameter { - // Concat Layer needs to specify the dimension along the concat will happen, - // the other dimensions must be the same for all the bottom blobs - // By default it will concatenate blobs along channels dimension + // The axis along which to concatenate -- may be negative to index from the + // end (e.g., -1 for the last axis). Other axes must have the + // same dimension for all the bottom blobs. + // By default, ConcatLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 2 [default = 1]; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. optional uint32 concat_dim = 1 [default = 1]; } @@ -444,13 +471,15 @@ message DropoutParameter { // (or constant) data generated by "Fillers" (see "message FillerParameter"). message DummyDataParameter { // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N - // num, N channels, N height, and N width fields, and must specify 0, 1 or N - // data_fillers. + // shape fields, and 0, 1 or N data_fillers. // // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. // If 1 data_filler is specified, it is applied to all top blobs. If N are // specified, the ith is applied to the ith top blob. repeated FillerParameter data_filler = 1; + repeated BlobShape shape = 6; + + // 4D dimensions -- deprecated. Use "shape" instead. repeated uint32 num = 2; repeated uint32 channels = 3; repeated uint32 height = 4; @@ -470,6 +499,24 @@ message EltwiseParameter { // Whether to use an asymptotically slower (for >2 inputs) but stabler method // of computing the gradient for the PROD operation. (No effect for SUM op.) optional bool stable_prod_grad = 3 [default = true]; + + // If true and the EltwiseOp is SUM, the last bottom blob is a singleton + // coefficient for the first N-1 bottom blobs, with shape (N-1, 1, 1, 1). + optional bool coeff_blob = 4 [default = false]; +} + +// Message that stores parameters used by EmbedLayer +message EmbedParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + // The input is given as integers to be interpreted as one-hot + // vector indices with dimension num_input. Hence num_input should be + // 1 greater than the maximum possible input value. + optional uint32 input_dim = 2; + + optional bool bias_term = 3 [default = true]; // Whether to use a bias term + optional FillerParameter weight_filler = 4; // The filler for the weight + optional FillerParameter bias_filler = 5; // The filler for the bias + } // Message that stores parameters used by ExpLayer @@ -548,6 +595,11 @@ message InnerProductParameter { optional bool bias_term = 2 [default = true]; // whether to have bias terms optional FillerParameter weight_filler = 3; // The filler for the weight optional FillerParameter bias_filler = 4; // The filler for the bias + + // The first axis to be lumped into a single inner product computation; + // all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 5 [default = 1]; } // Message that stores parameters used by LRNLayer @@ -624,6 +676,26 @@ message PythonParameter { optional string layer = 2; } +// Message that stores parameters used by ReshapeLayer +message ReshapeParameter { + // The new shape of the Blob. Must have the same "count" (product of + // dimensions) as the input Blob. + optional BlobShape shape = 1; +} + +// Message that stores parameters used by RecurrentLayer +message RecurrentParameter { + // The dimension of the output (and usually hidden state) representation -- + // must be explicitly set to non-zero. + optional uint32 num_output = 1 [default = 0]; + + optional FillerParameter weight_filler = 2; // The filler for the weight + optional FillerParameter bias_filler = 3; // The filler for the bias + + // Whether to enable displaying debug_info in the unrolled recurrent net. + optional bool debug_info = 4 [default = false]; +} + // Message that stores parameters used by ReLULayer message ReLUParameter { // Allow non-zero slope for negative inputs to speed up optimization @@ -652,12 +724,14 @@ message SigmoidParameter { // Message that stores parameters used by SliceLayer message SliceParameter { - // SliceLayer needs to know which dimension to slice across. - // Currently, SliceLayer only supports slicing across num (dim 0) - // and channels (dim 1). - // By default, SliceLayer slices across channels. - optional uint32 slice_dim = 1 [default = 1]; + // The axis along which to slice -- may be negative to index from the end + // (e.g., -1 for the last axis). + // By default, SliceLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 3 [default = 1]; repeated uint32 slice_point = 2; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 slice_dim = 1 [default = 1]; } // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer @@ -668,6 +742,11 @@ message SoftmaxParameter { CUDNN = 2; } optional Engine engine = 1 [default = DEFAULT]; + + // The axis along which to perform the softmax -- may be negative to index + // from the end (e.g., -1 for the last axis). + // Any other axes will be evaluated as independent softmaxes. + optional int32 axis = 2 [default = 1]; } // Message that stores parameters used by TanHLayer diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 8ed8aec2fc8..affbf149ac4 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -168,6 +168,25 @@ void Solver::Step(int iters) { Dtype smoothed_loss = 0; for (; iter_ < stop_iter; ++iter_) { + // zero-init the params + for (int i = 0; i < net_->params().size(); ++i) { + shared_ptr > blob = net_->params()[i]; + switch (Caffe::mode()) { + case Caffe::CPU: + caffe_set(blob->count(), static_cast(0), + blob->mutable_cpu_diff()); + break; + case Caffe::GPU: +#ifndef CPU_ONLY + caffe_gpu_set(blob->count(), static_cast(0), + blob->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + } + if (param_.test_interval() && iter_ % param_.test_interval() == 0 && (iter_ > 0 || param_.test_initialization())) { TestAll(); @@ -175,7 +194,13 @@ void Solver::Step(int iters) { const bool display = param_.display() && iter_ % param_.display() == 0; net_->set_debug_info(display && param_.debug_info()); - Dtype loss = net_->ForwardBackward(bottom_vec); + // accumulate the loss and gradient + Dtype loss = 0; + for (int i = 0; i < param_.iter_size(); ++i) { + loss += net_->ForwardBackward(bottom_vec); + } + loss /= param_.iter_size(); + // average the loss across iterations for smoothed reporting if (losses.size() < average_loss) { losses.push_back(loss); int size = losses.size(); @@ -420,16 +445,10 @@ void SGDSolver::PreSolve() { update_.clear(); temp_.clear(); for (int i = 0; i < net_params.size(); ++i) { - const Blob* net_param = net_params[i].get(); - history_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - update_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - temp_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); + const vector& shape = net_params[i]->shape(); + history_.push_back(shared_ptr >(new Blob(shape))); + update_.push_back(shared_ptr >(new Blob(shape))); + temp_.push_back(shared_ptr >(new Blob(shape))); } } @@ -477,7 +496,8 @@ void SGDSolver::ComputeUpdateValue() { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. - Dtype local_rate = rate * net_params_lr[param_id]; + Dtype local_rate = rate * net_params_lr[param_id] + / this->param_.iter_size(); Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { @@ -513,7 +533,8 @@ void SGDSolver::ComputeUpdateValue() { #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. - Dtype local_rate = rate * net_params_lr[param_id]; + Dtype local_rate = rate * net_params_lr[param_id] + / this->param_.iter_size(); Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index fa59fab1e8a..1c58b767bfc 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -19,10 +19,16 @@ template class AccuracyLayerTest : public ::testing::Test { protected: AccuracyLayerTest() - : blob_bottom_data_(new Blob(100, 10, 1, 1)), - blob_bottom_label_(new Blob(100, 1, 1, 1)), + : blob_bottom_data_(new Blob()), + blob_bottom_label_(new Blob()), blob_top_(new Blob()), top_k_(3) { + vector shape(2); + shape[0] = 100; + shape[1] = 10; + blob_bottom_data_->Reshape(shape); + shape.resize(1); + blob_bottom_label_->Reshape(shape); // fill the probability values FillerParameter filler_param; GaussianFiller filler(filler_param); diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp index e0678061173..7da6423b67c 100644 --- a/src/caffe/test/test_blob.cpp +++ b/src/caffe/test/test_blob.cpp @@ -1,4 +1,5 @@ #include +#include #include "gtest/gtest.h" @@ -31,10 +32,7 @@ TYPED_TEST(BlobSimpleTest, TestInitialization) { EXPECT_EQ(this->blob_preshaped_->height(), 4); EXPECT_EQ(this->blob_preshaped_->width(), 5); EXPECT_EQ(this->blob_preshaped_->count(), 120); - EXPECT_EQ(this->blob_->num(), 0); - EXPECT_EQ(this->blob_->channels(), 0); - EXPECT_EQ(this->blob_->height(), 0); - EXPECT_EQ(this->blob_->width(), 0); + EXPECT_EQ(this->blob_->num_axes(), 0); EXPECT_EQ(this->blob_->count(), 0); } @@ -54,6 +52,59 @@ TYPED_TEST(BlobSimpleTest, TestReshape) { EXPECT_EQ(this->blob_->count(), 120); } +TYPED_TEST(BlobSimpleTest, TestLegacyBlobProtoShapeEquals) { + BlobProto blob_proto; + + // Reshape to (3 x 2). + vector shape(2); + shape[0] = 3; + shape[1] = 2; + this->blob_->Reshape(shape); + + // (3 x 2) blob == (1 x 1 x 3 x 2) legacy blob + blob_proto.set_num(1); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_TRUE(this->blob_->ShapeEquals(blob_proto)); + + // (3 x 2) blob != (0 x 1 x 3 x 2) legacy blob + blob_proto.set_num(0); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto)); + + // (3 x 2) blob != (3 x 1 x 3 x 2) legacy blob + blob_proto.set_num(3); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto)); + + // Reshape to (1 x 3 x 2). + shape.insert(shape.begin(), 1); + this->blob_->Reshape(shape); + + // (1 x 3 x 2) blob == (1 x 1 x 3 x 2) legacy blob + blob_proto.set_num(1); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_TRUE(this->blob_->ShapeEquals(blob_proto)); + + // Reshape to (2 x 3 x 2). + shape[0] = 2; + this->blob_->Reshape(shape); + + // (2 x 3 x 2) blob != (1 x 1 x 3 x 2) legacy blob + blob_proto.set_num(1); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto)); +} + template class BlobMathTest : public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index f14f1d2fa4f..662a50fa23b 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -19,9 +19,9 @@ class ConcatLayerTest : public MultiDeviceTest { protected: ConcatLayerTest() - : blob_bottom_0(new Blob(2, 3, 6, 5)), - blob_bottom_1(new Blob(2, 5, 6, 5)), - blob_bottom_2(new Blob(5, 3, 6, 5)), + : blob_bottom_0_(new Blob(2, 3, 6, 5)), + blob_bottom_1_(new Blob(2, 5, 6, 5)), + blob_bottom_2_(new Blob(5, 3, 6, 5)), blob_top_(new Blob()) {} virtual void SetUp() { // fill the values @@ -29,30 +29,30 @@ class ConcatLayerTest : public MultiDeviceTest { FillerParameter filler_param; filler_param.set_value(1.); filler.reset(new ConstantFiller(filler_param)); - filler->Fill(this->blob_bottom_0); + filler->Fill(this->blob_bottom_0_); filler_param.set_value(2.); filler.reset(new ConstantFiller(filler_param)); - filler->Fill(this->blob_bottom_1); + filler->Fill(this->blob_bottom_1_); filler_param.set_value(3.); filler.reset(new ConstantFiller(filler_param)); - filler->Fill(this->blob_bottom_2); - blob_bottom_vec_0.push_back(blob_bottom_0); - blob_bottom_vec_0.push_back(blob_bottom_1); - blob_bottom_vec_1.push_back(blob_bottom_0); - blob_bottom_vec_1.push_back(blob_bottom_2); + filler->Fill(this->blob_bottom_2_); + blob_bottom_vec_0_.push_back(blob_bottom_0_); + blob_bottom_vec_0_.push_back(blob_bottom_1_); + blob_bottom_vec_1_.push_back(blob_bottom_0_); + blob_bottom_vec_1_.push_back(blob_bottom_2_); blob_top_vec_.push_back(blob_top_); } virtual ~ConcatLayerTest() { - delete blob_bottom_0; delete blob_bottom_1; - delete blob_bottom_2; delete blob_top_; + delete blob_bottom_0_; delete blob_bottom_1_; + delete blob_bottom_2_; delete blob_top_; } - Blob* const blob_bottom_0; - Blob* const blob_bottom_1; - Blob* const blob_bottom_2; + Blob* const blob_bottom_0_; + Blob* const blob_bottom_1_; + Blob* const blob_bottom_2_; Blob* const blob_top_; - vector*> blob_bottom_vec_0, blob_bottom_vec_1; + vector*> blob_bottom_vec_0_, blob_bottom_vec_1_; vector*> blob_top_vec_; }; @@ -61,61 +61,115 @@ TYPED_TEST_CASE(ConcatLayerTest, TestDtypesAndDevices); TYPED_TEST(ConcatLayerTest, TestSetupNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.mutable_concat_param()->set_concat_dim(0); + layer_param.mutable_concat_param()->set_axis(0); ConcatLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_1, this->blob_top_vec_); + layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_); EXPECT_EQ(this->blob_top_->num(), - this->blob_bottom_0->num() + this->blob_bottom_2->num()); - EXPECT_EQ(this->blob_top_->channels(), this->blob_bottom_0->channels()); - EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0->height()); - EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0->width()); + this->blob_bottom_0_->num() + this->blob_bottom_2_->num()); + EXPECT_EQ(this->blob_top_->channels(), this->blob_bottom_0_->channels()); + EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height()); + EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); } TYPED_TEST(ConcatLayerTest, TestSetupChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConcatLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_0, this->blob_top_vec_); - EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0->num()); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0_->num()); EXPECT_EQ(this->blob_top_->channels(), - this->blob_bottom_0->channels()+this->blob_bottom_1->channels()); - EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0->height()); - EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0->width()); + this->blob_bottom_0_->channels() + this->blob_bottom_1_->channels()); + EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height()); + EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); } +TYPED_TEST(ConcatLayerTest, TestSetupChannelsNegativeIndexing) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConcatLayer layer(layer_param); + // "channels" index is the third one from the end -- test negative indexing + // by setting axis to -3 and checking that we get the same results as above in + // TestSetupChannels. + layer_param.mutable_concat_param()->set_axis(-3); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0_->num()); + EXPECT_EQ(this->blob_top_->channels(), + this->blob_bottom_0_->channels() + this->blob_bottom_1_->channels()); + EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height()); + EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); +} + +TYPED_TEST(ConcatLayerTest, TestForwardNum) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_concat_param()->set_axis(0); + ConcatLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_1_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_vec_1_[0]->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + EXPECT_EQ(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_vec_1_[0]->data_at(n, c, h, w)); + } + } + } + } + for (int n = 0; n < this->blob_bottom_vec_1_[1]->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + EXPECT_EQ(this->blob_top_->data_at(n + 2, c, h, w), + this->blob_bottom_vec_1_[1]->data_at(n, c, h, w)); + } + } + } + } +} -TYPED_TEST(ConcatLayerTest, TestNum) { +TYPED_TEST(ConcatLayerTest, TestForwardChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConcatLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_0, this->blob_top_vec_); - layer.Forward(this->blob_bottom_vec_0, this->blob_top_vec_); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_0_, this->blob_top_vec_); for (int n = 0; n < this->blob_top_->num(); ++n) { - for (int c = 0; c < this->blob_bottom_0->channels(); ++c) { + for (int c = 0; c < this->blob_bottom_0_->channels(); ++c) { for (int h = 0; h < this->blob_top_->height(); ++h) { for (int w = 0; w < this->blob_top_->width(); ++w) { EXPECT_EQ(this->blob_top_->data_at(n, c, h, w), - this->blob_bottom_vec_0[0]->data_at(n, c, h, w)); + this->blob_bottom_vec_0_[0]->data_at(n, c, h, w)); } } } - for (int c = 0; c < this->blob_bottom_1->channels(); ++c) { + for (int c = 0; c < this->blob_bottom_1_->channels(); ++c) { for (int h = 0; h < this->blob_top_->height(); ++h) { for (int w = 0; w < this->blob_top_->width(); ++w) { - EXPECT_EQ(this->blob_top_->data_at(n, c+3, h, w), - this->blob_bottom_vec_0[1]->data_at(n, c, h, w)); + EXPECT_EQ(this->blob_top_->data_at(n, c + 3, h, w), + this->blob_bottom_vec_0_[1]->data_at(n, c, h, w)); } } } } } -TYPED_TEST(ConcatLayerTest, TestGradient) { +TYPED_TEST(ConcatLayerTest, TestGradientNum) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_concat_param()->set_axis(0); + ConcatLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradient(&layer, this->blob_bottom_vec_1_, + this->blob_top_vec_); +} + +TYPED_TEST(ConcatLayerTest, TestGradientChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConcatLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradient(&layer, this->blob_bottom_vec_0, + checker.CheckGradient(&layer, this->blob_bottom_vec_0_, this->blob_top_vec_); } diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp index be0c1347709..85c11341abd 100644 --- a/src/caffe/test/test_eltwise_layer.cpp +++ b/src/caffe/test/test_eltwise_layer.cpp @@ -22,14 +22,18 @@ class EltwiseLayerTest : public MultiDeviceTest { : blob_bottom_a_(new Blob(2, 3, 4, 5)), blob_bottom_b_(new Blob(2, 3, 4, 5)), blob_bottom_c_(new Blob(2, 3, 4, 5)), + blob_bottom_coeff_(new Blob()), blob_top_(new Blob()) { - // fill the values + vector coeff_shape(2); + coeff_shape[0] = 3; coeff_shape[1] = 2; + blob_bottom_coeff_->Reshape(coeff_shape); Caffe::set_random_seed(1701); FillerParameter filler_param; UniformFiller filler(filler_param); filler.Fill(this->blob_bottom_a_); filler.Fill(this->blob_bottom_b_); filler.Fill(this->blob_bottom_c_); + filler.Fill(this->blob_bottom_coeff_); blob_bottom_vec_.push_back(blob_bottom_a_); blob_bottom_vec_.push_back(blob_bottom_b_); blob_bottom_vec_.push_back(blob_bottom_c_); @@ -39,11 +43,13 @@ class EltwiseLayerTest : public MultiDeviceTest { delete blob_bottom_a_; delete blob_bottom_b_; delete blob_bottom_c_; + delete blob_bottom_coeff_; delete blob_top_; } Blob* const blob_bottom_a_; Blob* const blob_bottom_b_; Blob* const blob_bottom_c_; + Blob* const blob_bottom_coeff_; Blob* const blob_top_; vector*> blob_bottom_vec_; vector*> blob_top_vec_; @@ -126,6 +132,37 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeff) { } } +TYPED_TEST(EltwiseLayerTest, TestSumBlobCoeff) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param(); + eltwise_param->set_operation(EltwiseParameter_EltwiseOp_SUM); + eltwise_param->set_coeff_blob(true); + eltwise_param->add_coeff(1); + eltwise_param->add_coeff(-0.5); + eltwise_param->add_coeff(2); + shared_ptr > layer( + new EltwiseLayer(layer_param)); + this->blob_bottom_vec_.push_back(this->blob_bottom_coeff_); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const int num = this->blob_top_->num(); + const int dim = count / num; + const Dtype* coeff_data = this->blob_bottom_coeff_->cpu_data(); + for (int n = 0; n < num; ++n) { + for (int d = 0; d < dim; ++d) { + Dtype sum = 0; + for (int i = 0; i < this->blob_bottom_vec_.size() - 1; ++i) { + const Dtype coeff = coeff_data[i * num + n] * eltwise_param->coeff(i); + sum += coeff * this->blob_bottom_vec_[i]->cpu_data()[n * dim + d]; + } + EXPECT_NEAR(data[n * dim + d], sum, 1e-4); + } + } +} + TYPED_TEST(EltwiseLayerTest, TestStableProdGradient) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -175,6 +212,26 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeffGradient) { this->blob_top_vec_); } +TYPED_TEST(EltwiseLayerTest, TestSumBlobCoeffGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param(); + eltwise_param->set_operation(EltwiseParameter_EltwiseOp_SUM); + eltwise_param->set_coeff_blob(true); + eltwise_param->add_coeff(1); + eltwise_param->add_coeff(-0.5); + eltwise_param->add_coeff(2); + EltwiseLayer layer(layer_param); + this->blob_bottom_vec_.push_back(this->blob_bottom_coeff_); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 1); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 2); +} + TYPED_TEST(EltwiseLayerTest, TestMax) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp new file mode 100644 index 00000000000..7a4fb9800f2 --- /dev/null +++ b/src/caffe/test/test_embed_layer.cpp @@ -0,0 +1,183 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +#ifndef CPU_ONLY +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; +#endif + +template +class EmbedLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + EmbedLayerTest() + : blob_bottom_(new Blob(4, 1, 1, 1)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~EmbedLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(EmbedLayerTest, TestDtypesAndDevices); + +TYPED_TEST(EmbedLayerTest, TestSetUp) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + embed_param->set_num_output(10); + embed_param->set_input_dim(5); + shared_ptr > layer(new EmbedLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_top_->num_axes(), 5); + EXPECT_EQ(this->blob_top_->shape(0), 4); + EXPECT_EQ(this->blob_top_->shape(1), 1); + EXPECT_EQ(this->blob_top_->shape(2), 1); + EXPECT_EQ(this->blob_top_->shape(3), 1); + EXPECT_EQ(this->blob_top_->shape(4), 10); +} + +TYPED_TEST(EmbedLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + const int kNumOutput = 10; + const int kInputDim = 5; + embed_param->set_num_output(kNumOutput); + embed_param->set_input_dim(kInputDim); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + embed_param->set_bias_term(false); + shared_ptr > layer(new EmbedLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer->blobs().size()); + vector weight_shape(2); + weight_shape[0] = kInputDim; + weight_shape[1] = kNumOutput; + ASSERT_TRUE(weight_shape == layer->blobs()[0]->shape()); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + this->blob_bottom_->mutable_cpu_data()[i] = caffe_rng_rand() % kInputDim; + } + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + vector weight_offset(2, 0); + vector top_offset(5, 0); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + weight_offset[0] = static_cast(this->blob_bottom_->cpu_data()[i]); + weight_offset[1] = 0; + top_offset[0] = i; + top_offset[4] = 0; + for (int j = 0; j < kNumOutput; ++j) { + EXPECT_EQ(layer->blobs()[0]->data_at(weight_offset), + this->blob_top_->data_at(top_offset)); + ++top_offset[4]; + ++weight_offset[1]; + } + } +} + +TYPED_TEST(EmbedLayerTest, TestForwardWithBias) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + const int kNumOutput = 10; + const int kInputDim = 5; + embed_param->set_num_output(kNumOutput); + embed_param->set_input_dim(kInputDim); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + embed_param->mutable_bias_filler()->CopyFrom(embed_param->weight_filler()); + embed_param->set_bias_term(true); + shared_ptr > layer(new EmbedLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(2, layer->blobs().size()); + vector weight_shape(2); + weight_shape[0] = kInputDim; + weight_shape[1] = kNumOutput; + ASSERT_TRUE(weight_shape == layer->blobs()[0]->shape()); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + this->blob_bottom_->mutable_cpu_data()[i] = caffe_rng_rand() % kInputDim; + } + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + vector bias_offset(1, 0); + vector weight_offset(2, 0); + vector top_offset(5, 0); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + weight_offset[0] = static_cast(this->blob_bottom_->cpu_data()[i]); + weight_offset[1] = 0; + top_offset[0] = i; + top_offset[4] = 0; + bias_offset[0] = 0; + for (int j = 0; j < kNumOutput; ++j) { + EXPECT_EQ(layer->blobs()[0]->data_at(weight_offset) + + layer->blobs()[1]->data_at(bias_offset), + this->blob_top_->data_at(top_offset)); + ++top_offset[4]; + ++weight_offset[1]; + ++bias_offset[0]; + } + } +} + +TYPED_TEST(EmbedLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + embed_param->set_num_output(10); + embed_param->set_input_dim(5); + embed_param->set_bias_term(false); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + EmbedLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + this->blob_bottom_->mutable_cpu_data()[0] = 4; + this->blob_bottom_->mutable_cpu_data()[1] = 2; + this->blob_bottom_->mutable_cpu_data()[2] = 2; + this->blob_bottom_->mutable_cpu_data()[3] = 3; + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, -2); +} + +TYPED_TEST(EmbedLayerTest, TestGradientWithBias) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + embed_param->set_num_output(10); + embed_param->set_input_dim(5); + embed_param->set_bias_term(true); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + embed_param->mutable_bias_filler()->CopyFrom(embed_param->weight_filler()); + EmbedLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + this->blob_bottom_->mutable_cpu_data()[0] = 4; + this->blob_bottom_->mutable_cpu_data()[1] = 2; + this->blob_bottom_->mutable_cpu_data()[2] = 2; + this->blob_bottom_->mutable_cpu_data()[3] = 3; + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, -2); +} + +} // namespace caffe diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp index 8d3b3d1e987..c9b027f88cf 100644 --- a/src/caffe/test/test_hdf5data_layer.cpp +++ b/src/caffe/test/test_hdf5data_layer.cpp @@ -77,15 +77,13 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) { EXPECT_EQ(this->blob_top_data_->height(), height); EXPECT_EQ(this->blob_top_data_->width(), width); - EXPECT_EQ(this->blob_top_label_->num(), batch_size); - EXPECT_EQ(this->blob_top_label_->channels(), 1); - EXPECT_EQ(this->blob_top_label_->height(), 1); - EXPECT_EQ(this->blob_top_label_->width(), 1); - - EXPECT_EQ(this->blob_top_label2_->num(), batch_size); - EXPECT_EQ(this->blob_top_label2_->channels(), 1); - EXPECT_EQ(this->blob_top_label2_->height(), 1); - EXPECT_EQ(this->blob_top_label2_->width(), 1); + EXPECT_EQ(this->blob_top_label_->num_axes(), 2); + EXPECT_EQ(this->blob_top_label_->shape(0), batch_size); + EXPECT_EQ(this->blob_top_label_->shape(1), 1); + + EXPECT_EQ(this->blob_top_label2_->num_axes(), 2); + EXPECT_EQ(this->blob_top_label2_->shape(0), batch_size); + EXPECT_EQ(this->blob_top_label2_->shape(1), 1); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); diff --git a/src/caffe/test/test_lstm_layer.cpp b/src/caffe/test/test_lstm_layer.cpp new file mode 100644 index 00000000000..a0ce45f6383 --- /dev/null +++ b/src/caffe/test/test_lstm_layer.cpp @@ -0,0 +1,265 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/sequence_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class LSTMLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + LSTMLayerTest() : num_output_(7) { + blob_bottom_vec_.push_back(&blob_bottom_); + blob_bottom_vec_.push_back(&blob_bottom_flush_); + blob_top_vec_.push_back(&blob_top_); + unit_blob_bottom_vec_.push_back(&unit_blob_bottom_c_prev_); + unit_blob_bottom_vec_.push_back(&unit_blob_bottom_x_); + unit_blob_bottom_vec_.push_back(&unit_blob_bottom_flush_); + unit_blob_top_vec_.push_back(&unit_blob_top_c_); + unit_blob_top_vec_.push_back(&unit_blob_top_h_); + + ReshapeBlobs(1, 3); + + layer_param_.mutable_recurrent_param()->set_num_output(num_output_); + FillerParameter* weight_filler = + layer_param_.mutable_recurrent_param()->mutable_weight_filler(); + weight_filler->set_type("gaussian"); + weight_filler->set_std(0.2); + FillerParameter* bias_filler = + layer_param_.mutable_recurrent_param()->mutable_bias_filler(); + bias_filler->set_type("gaussian"); + bias_filler->set_std(0.1); + + layer_param_.set_phase(TEST); + } + + void ReshapeBlobs(int num_timesteps, int num_instances) { + blob_bottom_.Reshape(num_timesteps, num_instances, 3, 2); + vector shape(2); + shape[0] = num_timesteps; + shape[1] = num_instances; + blob_bottom_flush_.Reshape(shape); + shape.push_back(num_output_); + + shape[0] = 1; shape[1] = num_instances; shape[2] = 4 * num_output_; + unit_blob_bottom_x_.Reshape(shape); + shape[0] = 1; shape[1] = num_instances; shape[2] = num_output_; + unit_blob_bottom_c_prev_.Reshape(shape); + shape[0] = 1; shape[1] = 1; shape[2] = num_instances; + unit_blob_bottom_flush_.Reshape(shape); + + FillerParameter filler_param; + filler_param.set_min(-1); + filler_param.set_max(1); + UniformFiller filler(filler_param); + filler.Fill(&blob_bottom_); + filler.Fill(&unit_blob_bottom_c_prev_); + filler.Fill(&unit_blob_bottom_x_); + } + + int num_output_; + LayerParameter layer_param_; + Blob blob_bottom_; + Blob blob_bottom_flush_; + Blob blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; + + Blob unit_blob_bottom_flush_; + Blob unit_blob_bottom_c_prev_; + Blob unit_blob_bottom_x_; + Blob unit_blob_top_c_; + Blob unit_blob_top_h_; + vector*> unit_blob_bottom_vec_; + vector*> unit_blob_top_vec_; +}; + +TYPED_TEST_CASE(LSTMLayerTest, TestDtypesAndDevices); + +TYPED_TEST(LSTMLayerTest, TestSetUp) { + typedef typename TypeParam::Dtype Dtype; + LSTMLayer layer(this->layer_param_); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + vector expected_top_shape = this->blob_bottom_.shape(); + expected_top_shape.resize(3); + expected_top_shape[2] = this->num_output_; + EXPECT_TRUE(this->blob_top_.shape() == expected_top_shape); +} + +TYPED_TEST(LSTMLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + const int kNumTimesteps = 3; + const int num = this->blob_bottom_.shape(1); + this->ReshapeBlobs(kNumTimesteps, num); + + // Fill the flush blob with <0, 1, 1, ..., 1>, + // indicating a sequence that begins at the first timestep + // then continues for the rest of the sequence. + for (int t = 0; t < kNumTimesteps; ++t) { + for (int n = 0; n < num; ++n) { + this->blob_bottom_flush_.mutable_cpu_data()[t * num + n] = t > 0; + } + } + + // Process the full sequence in a single batch. + FillerParameter filler_param; + filler_param.set_mean(0); + filler_param.set_std(1); + GaussianFiller sequence_filler(filler_param); + sequence_filler.Fill(&this->blob_bottom_); + shared_ptr > layer(new LSTMLayer(this->layer_param_)); + Caffe::set_random_seed(1701); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + LOG(INFO) << "Calling forward for full sequence LSTM"; + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + + // Copy the inputs and outputs to reuse/check them later. + Blob bottom_copy(this->blob_bottom_.shape()); + bottom_copy.CopyFrom(this->blob_bottom_); + Blob top_copy(this->blob_top_.shape()); + top_copy.CopyFrom(this->blob_top_); + + // Process the batch one timestep at a time; + // check that we get the same result. + this->ReshapeBlobs(1, num); + layer.reset(new LSTMLayer(this->layer_param_)); + Caffe::set_random_seed(1701); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + const int bottom_count = this->blob_bottom_.count(); + const int top_count = this->blob_top_.count(); + const Dtype kEpsilon = 1e-5; + for (int t = 0; t < kNumTimesteps; ++t) { + caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count, + this->blob_bottom_.mutable_cpu_data()); + for (int n = 0; n < num; ++n) { + this->blob_bottom_flush_.mutable_cpu_data()[n] = t > 0; + } + LOG(INFO) << "Calling forward for LSTM timestep " << t; + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < top_count; ++i) { + ASSERT_LT(t * top_count + i, top_copy.count()); + EXPECT_NEAR(this->blob_top_.cpu_data()[i], + top_copy.cpu_data()[t * top_count + i], kEpsilon) + << "t = " << t << "; i = " << i; + } + } + + // Process the batch one timestep at a time with all flush blobs set to 0. + // Check that we get a different result, except in the first timestep. + Caffe::set_random_seed(1701); + layer.reset(new LSTMLayer(this->layer_param_)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + for (int t = 0; t < kNumTimesteps; ++t) { + caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count, + this->blob_bottom_.mutable_cpu_data()); + for (int n = 0; n < num; ++n) { + this->blob_bottom_flush_.mutable_cpu_data()[n] = 0; + } + LOG(INFO) << "Calling forward for LSTM timestep " << t; + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < top_count; ++i) { + if (t == 0) { + EXPECT_NEAR(this->blob_top_.cpu_data()[i], + top_copy.cpu_data()[t * top_count + i], kEpsilon) + << "t = " << t << "; i = " << i; + } else { + EXPECT_NE(this->blob_top_.cpu_data()[i], + top_copy.cpu_data()[t * top_count + i]) + << "t = " << t << "; i = " << i; + } + } + } +} + +TYPED_TEST(LSTMLayerTest, TestLSTMUnitSetUp) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LSTMUnitLayer layer(layer_param); + layer.SetUp(this->unit_blob_bottom_vec_, this->unit_blob_top_vec_); + const int num_axes = this->unit_blob_bottom_c_prev_.num_axes(); + ASSERT_EQ(num_axes, this->unit_blob_top_c_.num_axes()); + ASSERT_EQ(num_axes, this->unit_blob_top_h_.num_axes()); + for (int i = 0; i < num_axes; ++i) { + EXPECT_EQ(this->unit_blob_bottom_c_prev_.shape(i), + this->unit_blob_top_c_.shape(i)); + EXPECT_EQ(this->unit_blob_bottom_c_prev_.shape(i), + this->unit_blob_top_h_.shape(i)); + } +} + +TYPED_TEST(LSTMLayerTest, TestLSTMUnitGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LSTMUnitLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + Dtype* flush_data = this->blob_bottom_flush_.mutable_cpu_data(); + flush_data[0] = 0; + flush_data[1] = 0; + flush_data[2] = 0; + checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_, + this->unit_blob_top_vec_, 0); + checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_, + this->unit_blob_top_vec_, 1); +} + +TYPED_TEST(LSTMLayerTest, TestLSTMUnitGradientNonZeroFlush) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + LSTMUnitLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + Dtype* flush_data = this->blob_bottom_flush_.mutable_cpu_data(); + flush_data[0] = 1; + flush_data[1] = 0; + flush_data[2] = 1; + checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_, + this->unit_blob_top_vec_, 0); + checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_, + this->unit_blob_top_vec_, 1); +} + +TYPED_TEST(LSTMLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LSTMLayer layer(this->layer_param_); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +TYPED_TEST(LSTMLayerTest, TestGradientNonZeroFlush) { + typedef typename TypeParam::Dtype Dtype; + LSTMLayer layer(this->layer_param_); + GradientChecker checker(1e-2, 1e-3); + for (int i = 0; i < this->blob_bottom_flush_.count(); ++i) { + this->blob_bottom_flush_.mutable_cpu_data()[i] = i > 2; + } + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +TYPED_TEST(LSTMLayerTest, TestGradientNonZeroFlushBufferSize2) { + typedef typename TypeParam::Dtype Dtype; + this->ReshapeBlobs(2, 2); + // fill the values + FillerParameter filler_param; + UniformFiller filler(filler_param); + filler.Fill(&this->blob_bottom_); + LSTMLayer layer(this->layer_param_); + GradientChecker checker(1e-2, 1e-3); + for (int i = 0; i < this->blob_bottom_flush_.count(); ++i) { + this->blob_bottom_flush_.mutable_cpu_data()[i] = i > 2; + } + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +} // namespace caffe diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 1680a3f28d5..c65116d7eb1 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -63,18 +63,19 @@ class NetTest : public MultiDeviceTest { " name: 'data' " " type: 'DummyData' " " dummy_data_param { " - " num: 5 " - " channels: 2 " - " height: 3 " - " width: 4 " - " num: 5 " - " channels: 1 " - " height: 1 " - " width: 1 " + " shape { " + " dim: 5 " + " dim: 2 " + " dim: 3 " + " dim: 4 " + " } " " data_filler { " " type: 'gaussian' " " std: 0.01 " " } " + " shape { " + " dim: 5 " + " } " " data_filler { " " type: 'constant' " " value: 0 " @@ -1006,11 +1007,10 @@ TYPED_TEST(NetTest, TestSharedWeightsUpdate) { EXPECT_EQ(this->net_->layer_names()[2], "innerproduct2"); Blob* ip1_weights = this->net_->layers()[1]->blobs()[0].get(); Blob* ip2_weights = this->net_->layers()[2]->blobs()[0].get(); - // Check that data blobs of shared weights share the same location in memory. + // Check that data and diff blobs of shared weights share the same memory + // locations. EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); - // Check that diff blobs of shared weights are at different locations in - // memory. (The diffs should be accumulated at update time.) - EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); + EXPECT_EQ(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); this->net_->Forward(bottom); this->net_->Backward(); // Compute the expected update as the data minus the two diffs. @@ -1023,11 +1023,7 @@ TYPED_TEST(NetTest, TestSharedWeightsUpdate) { // Make sure the diffs are non-trivial. for (int i = 0; i < count; ++i) { EXPECT_NE(0, ip1_weights->cpu_diff()[i]); - EXPECT_NE(0, ip2_weights->cpu_diff()[i]); - EXPECT_NE(ip1_weights->cpu_diff()[i], ip2_weights->cpu_diff()[i]); } - caffe_axpy(count, Dtype(1), ip2_weights->cpu_diff(), - shared_params.mutable_cpu_diff()); caffe_axpy(count, Dtype(-1), shared_params.cpu_diff(), shared_params.mutable_cpu_data()); const Dtype* expected_updated_params = shared_params.cpu_data(); @@ -1064,8 +1060,8 @@ TYPED_TEST(NetTest, TestSharedWeightsUpdate) { EXPECT_NE(0, ip1_weights->cpu_diff()[i]); EXPECT_NE(0, ip2_weights->cpu_diff()[i]); EXPECT_NE(ip1_weights->cpu_diff()[i], ip2_weights->cpu_diff()[i]); - EXPECT_EQ(ip1_weights->cpu_diff()[i] + ip2_weights->cpu_diff()[i], - shared_params.cpu_diff()[i]); + EXPECT_FLOAT_EQ(ip1_weights->cpu_diff()[i] + ip2_weights->cpu_diff()[i], + shared_params.cpu_diff()[i]); } caffe_axpy(count, Dtype(-1), ip1_weights->cpu_diff(), unshared_params1.mutable_cpu_data()); @@ -1095,11 +1091,10 @@ TYPED_TEST(NetTest, TestSharedWeightsResume) { EXPECT_EQ(this->net_->layer_names()[2], "innerproduct2"); Blob* ip1_weights = this->net_->layers()[1]->blobs()[0].get(); Blob* ip2_weights = this->net_->layers()[2]->blobs()[0].get(); - // Check that data blobs of shared weights share the same location in memory. + // Check that data and diff blobs of shared weights share the same memory + // locations. EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); - // Check that diff blobs of shared weights are at different locations in - // memory. (The diffs should be accumulated at update time.) - EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); + EXPECT_EQ(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); this->net_->ForwardBackward(bottom); this->net_->Update(); Blob shared_params; @@ -1122,14 +1117,13 @@ TYPED_TEST(NetTest, TestSharedWeightsResume) { ASSERT_FALSE(NULL == ip1_weights); ASSERT_FALSE(NULL == ip2_weights); EXPECT_NE(ip1_weights, ip2_weights); - // Check that data blobs of shared weights share the same location in memory. + // Check that data and diff blobs of shared weights share the same memory + // locations. EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + EXPECT_EQ(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); for (int i = 0; i < count; ++i) { EXPECT_FLOAT_EQ(shared_params.cpu_data()[i], ip1_weights->cpu_data()[i]); } - // Check that diff blobs of shared weights are at different locations in - // memory. (The diffs should be accumulated at update time.) - EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); } TYPED_TEST(NetTest, TestParamPropagateDown) { diff --git a/src/caffe/test/test_rnn_layer.cpp b/src/caffe/test/test_rnn_layer.cpp new file mode 100644 index 00000000000..eab9269ce77 --- /dev/null +++ b/src/caffe/test/test_rnn_layer.cpp @@ -0,0 +1,196 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/sequence_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class RNNLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + RNNLayerTest() : num_output_(7) { + blob_bottom_vec_.push_back(&blob_bottom_); + blob_bottom_vec_.push_back(&blob_bottom_flush_); + blob_top_vec_.push_back(&blob_top_); + + ReshapeBlobs(1, 3); + + layer_param_.mutable_recurrent_param()->set_num_output(num_output_); + FillerParameter* weight_filler = + layer_param_.mutable_recurrent_param()->mutable_weight_filler(); + weight_filler->set_type("gaussian"); + weight_filler->set_std(0.2); + FillerParameter* bias_filler = + layer_param_.mutable_recurrent_param()->mutable_bias_filler(); + bias_filler->set_type("gaussian"); + bias_filler->set_std(0.1); + + layer_param_.set_phase(TEST); + } + + void ReshapeBlobs(int num_timesteps, int num_instances) { + blob_bottom_.Reshape(num_timesteps, num_instances, 3, 2); + vector shape(2); + shape[0] = num_timesteps; + shape[1] = num_instances; + blob_bottom_flush_.Reshape(shape); + + FillerParameter filler_param; + filler_param.set_min(-1); + filler_param.set_max(1); + UniformFiller filler(filler_param); + filler.Fill(&blob_bottom_); + } + + int num_output_; + LayerParameter layer_param_; + Blob blob_bottom_; + Blob blob_bottom_flush_; + Blob blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(RNNLayerTest, TestDtypesAndDevices); + +TYPED_TEST(RNNLayerTest, TestSetUp) { + typedef typename TypeParam::Dtype Dtype; + RNNLayer layer(this->layer_param_); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + vector expected_top_shape = this->blob_bottom_.shape(); + expected_top_shape.resize(3); + expected_top_shape[2] = this->num_output_; + EXPECT_TRUE(this->blob_top_.shape() == expected_top_shape); +} + +TYPED_TEST(RNNLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + const int kNumTimesteps = 3; + const int num = this->blob_bottom_.shape(1); + this->ReshapeBlobs(kNumTimesteps, num); + + // Fill the flush blob with <0, 1, 1, ..., 1>, + // indicating a sequence that begins at the first timestep + // then continues for the rest of the sequence. + for (int t = 0; t < kNumTimesteps; ++t) { + for (int n = 0; n < num; ++n) { + this->blob_bottom_flush_.mutable_cpu_data()[t * num + n] = t > 0; + } + } + + // Process the full sequence in a single batch. + FillerParameter filler_param; + filler_param.set_mean(0); + filler_param.set_std(1); + GaussianFiller sequence_filler(filler_param); + sequence_filler.Fill(&this->blob_bottom_); + shared_ptr > layer(new RNNLayer(this->layer_param_)); + Caffe::set_random_seed(1701); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + LOG(INFO) << "Calling forward for full sequence RNN"; + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + + // Copy the inputs and outputs to reuse/check them later. + Blob bottom_copy(this->blob_bottom_.shape()); + bottom_copy.CopyFrom(this->blob_bottom_); + Blob top_copy(this->blob_top_.shape()); + top_copy.CopyFrom(this->blob_top_); + + // Process the batch one timestep at a time; + // check that we get the same result. + this->ReshapeBlobs(1, num); + layer.reset(new RNNLayer(this->layer_param_)); + Caffe::set_random_seed(1701); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + const int bottom_count = this->blob_bottom_.count(); + const int top_count = this->blob_top_.count(); + const Dtype kEpsilon = 1e-5; + for (int t = 0; t < kNumTimesteps; ++t) { + caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count, + this->blob_bottom_.mutable_cpu_data()); + for (int n = 0; n < num; ++n) { + this->blob_bottom_flush_.mutable_cpu_data()[n] = t > 0; + } + LOG(INFO) << "Calling forward for RNN timestep " << t; + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < top_count; ++i) { + ASSERT_LT(t * top_count + i, top_copy.count()); + EXPECT_NEAR(this->blob_top_.cpu_data()[i], + top_copy.cpu_data()[t * top_count + i], kEpsilon) + << "t = " << t << "; i = " << i; + } + } + + // Process the batch one timestep at a time with all flush blobs set to 0. + // Check that we get a different result, except in the first timestep. + Caffe::set_random_seed(1701); + layer.reset(new RNNLayer(this->layer_param_)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + for (int t = 0; t < kNumTimesteps; ++t) { + caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count, + this->blob_bottom_.mutable_cpu_data()); + for (int n = 0; n < num; ++n) { + this->blob_bottom_flush_.mutable_cpu_data()[n] = 0; + } + LOG(INFO) << "Calling forward for RNN timestep " << t; + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < top_count; ++i) { + if (t == 0) { + EXPECT_NEAR(this->blob_top_.cpu_data()[i], + top_copy.cpu_data()[t * top_count + i], kEpsilon) + << "t = " << t << "; i = " << i; + } else { + EXPECT_NE(this->blob_top_.cpu_data()[i], + top_copy.cpu_data()[t * top_count + i]) + << "t = " << t << "; i = " << i; + } + } + } +} + +TYPED_TEST(RNNLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + RNNLayer layer(this->layer_param_); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +TYPED_TEST(RNNLayerTest, TestGradientNonZeroFlush) { + typedef typename TypeParam::Dtype Dtype; + RNNLayer layer(this->layer_param_); + GradientChecker checker(1e-2, 1e-3); + for (int i = 0; i < this->blob_bottom_flush_.count(); ++i) { + this->blob_bottom_flush_.mutable_cpu_data()[i] = i > 2; + } + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +TYPED_TEST(RNNLayerTest, TestGradientNonZeroFlushBufferSize2) { + typedef typename TypeParam::Dtype Dtype; + this->ReshapeBlobs(2, 2); + // fill the values + FillerParameter filler_param; + UniformFiller filler(filler_param); + filler.Fill(&this->blob_bottom_); + RNNLayer layer(this->layer_param_); + GradientChecker checker(1e-2, 1e-3); + for (int i = 0; i < this->blob_bottom_flush_.count(); ++i) { + this->blob_bottom_flush_.mutable_cpu_data()[i] = i > 2; + } + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +} // namespace caffe diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp index 395be280089..ccd03646d19 100644 --- a/src/caffe/test/test_slice_layer.cpp +++ b/src/caffe/test/test_slice_layer.cpp @@ -62,7 +62,7 @@ TYPED_TEST_CASE(SliceLayerTest, TestDtypesAndDevices); TYPED_TEST(SliceLayerTest, TestSetupNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.mutable_slice_param()->set_slice_dim(0); + layer_param.mutable_slice_param()->set_axis(0); SliceLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_1_); EXPECT_EQ(this->blob_bottom_->num(), 3 * this->blob_top_0_->num()); @@ -91,7 +91,7 @@ TYPED_TEST(SliceLayerTest, TestSetupChannels) { TYPED_TEST(SliceLayerTest, TestSliceAcrossNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.mutable_slice_param()->set_slice_dim(0); + layer_param.mutable_slice_param()->set_axis(0); SliceLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_0_); const int top_num = this->blob_bottom_->num() / 2; @@ -166,7 +166,7 @@ TYPED_TEST(SliceLayerTest, TestGradientAcrossNum) { // Gradient checks are slow; reduce blob size. this->ReduceBottomBlobSize(); LayerParameter layer_param; - layer_param.mutable_slice_param()->set_slice_dim(0); + layer_param.mutable_slice_param()->set_axis(0); SliceLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, diff --git a/src/caffe/test/test_solver.cpp b/src/caffe/test/test_solver.cpp index 1c2c9bbb740..ceabc9cdd2c 100644 --- a/src/caffe/test/test_solver.cpp +++ b/src/caffe/test/test_solver.cpp @@ -55,14 +55,15 @@ TYPED_TEST(SolverTest, TestInitTrainTestNets) { " name: 'data' " " type: 'DummyData' " " dummy_data_param { " - " num: 5 " - " channels: 3 " - " height: 10 " - " width: 10 " - " num: 5 " - " channels: 1 " - " height: 1 " - " width: 1 " + " shape { " + " dim: 5 " + " dim: 2 " + " dim: 3 " + " dim: 4 " + " } " + " shape { " + " dim: 5 " + " } " " } " " top: 'data' " " top: 'label' " diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index b243a9804ec..77ef7f257f4 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -252,11 +252,11 @@ void hdf5_load_nd_dataset_helper( CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data"; - blob->Reshape( - dims[0], - (dims.size() > 1) ? dims[1] : 1, - (dims.size() > 2) ? dims[2] : 1, - (dims.size() > 3) ? dims[3] : 1); + vector blob_dims(dims.size()); + for (int i = 0; i < dims.size(); ++i) { + blob_dims[i] = dims[i]; + } + blob->Reshape(blob_dims); } template <>