vinniefalco
diff --git a/‎Makefile
+4 b/‎Makefile
+4
diff --git a/‎db/file_indexer.cc
+202 b/‎db/file_indexer.cc
+202
diff --git a/‎db/file_indexer.h
+129 b/‎db/file_indexer.h
+129
@@ -106,6 +106,7 @@ TESTS = \
 	backupable_db_test \
 	version_edit_test \
 	version_set_test \
+	file_indexer_test \
 	write_batch_test\
 	deletefile_test \
 	table_test \
@@ -376,6 +377,9 @@ version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
 version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
 
+file_indexer_test : db/file_indexer_test.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) db/file_indexer_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
+
 reduce_levels_test: tools/reduce_levels_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) tools/reduce_levels_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
 
 
@@ -0,0 +1,202 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/file_indexer.h"
+#include <algorithm>
+#include "rocksdb/comparator.h"
+#include "db/version_edit.h"
+
+namespace rocksdb {
+
+FileIndexer::FileIndexer(const uint32_t num_levels,
+                         const Comparator* ucmp)
+  : num_levels_(num_levels),
+    ucmp_(ucmp),
+    next_level_index_(num_levels),
+    level_rb_(num_levels, -1) {
+}
+
+
+uint32_t FileIndexer::NumLevelIndex() {
+  return next_level_index_.size();
+}
+
+uint32_t FileIndexer::LevelIndexSize(uint32_t level) {
+  return next_level_index_[level].size();
+}
+
+void FileIndexer::GetNextLevelIndex(
+    const uint32_t level, const uint32_t file_index, const int cmp_smallest,
+    const int cmp_largest, int32_t* left_bound, int32_t* right_bound) {
+  assert(level > 0);
+
+  // Last level, no hint
+  if (level == num_levels_ - 1) {
+    *left_bound = 0;
+    *right_bound = -1;
+    return;
+  }
+
+  assert(level < num_levels_ - 1);
+  assert(static_cast<int32_t>(file_index) <= level_rb_[level]);
+
+  const auto& index = next_level_index_[level][file_index];
+
+  if (cmp_smallest < 0) {
+    *left_bound = (level > 0 && file_index > 0) ?
+      next_level_index_[level][file_index - 1].largest_lb : 0;
+    *right_bound = index.smallest_rb;
+  } else if (cmp_smallest == 0) {
+    *left_bound = index.smallest_lb;
+    *right_bound = index.smallest_rb;
+  } else if (cmp_smallest > 0 && cmp_largest < 0) {
+    *left_bound = index.smallest_lb;
+    *right_bound = index.largest_rb;
+  } else if (cmp_largest == 0) {
+    *left_bound = index.largest_lb;
+    *right_bound = index.largest_rb;
+  } else if (cmp_largest > 0) {
+    *left_bound = index.largest_lb;
+    *right_bound = level_rb_[level + 1];
+  } else {
+    assert(false);
+  }
+
+  assert(*left_bound >= 0);
+  assert(*left_bound <= *right_bound + 1);
+  assert(*right_bound <= level_rb_[level + 1]);
+}
+
+void FileIndexer::ClearIndex() {
+  for (uint32_t level = 1; level < num_levels_; ++level) {
+    next_level_index_[level].clear();
+  }
+}
+
+void FileIndexer::UpdateIndex(std::vector<FileMetaData*>* const files) {
+  if (files == nullptr) {
+    return;
+  }
+
+  // L1 - Ln-1
+  for (uint32_t level = 1; level < num_levels_ - 1; ++level) {
+    const auto& upper_files = files[level];
+    const int32_t upper_size = upper_files.size();
+    const auto& lower_files = files[level + 1];
+    level_rb_[level] = upper_files.size() - 1;
+    if (upper_size == 0) {
+      continue;
+    }
+    auto& index = next_level_index_[level];
+    index.resize(upper_size);
+
+    CalculateLB(upper_files, lower_files, &index,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->Compare(a->smallest.user_key(), b->largest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) {
+          index->smallest_lb = f_idx;
+        });
+    CalculateLB(upper_files, lower_files, &index,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->Compare(a->largest.user_key(), b->largest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) {
+          index->largest_lb = f_idx;
+        });
+    CalculateRB(upper_files, lower_files, &index,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->Compare(a->smallest.user_key(), b->smallest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) {
+          index->smallest_rb = f_idx;
+        });
+    CalculateRB(upper_files, lower_files, &index,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->Compare(a->largest.user_key(), b->smallest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) {
+          index->largest_rb = f_idx;
+        });
+  }
+  level_rb_[num_levels_ - 1] = files[num_levels_ - 1].size() - 1;
+}
+
+void FileIndexer::CalculateLB(const std::vector<FileMetaData*>& upper_files,
+    const std::vector<FileMetaData*>& lower_files,
+    std::vector<IndexUnit>* index,
+    std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+    std::function<void(IndexUnit*, int32_t)> set_index) {
+  const int32_t upper_size = upper_files.size();
+  const int32_t lower_size = lower_files.size();
+  int32_t upper_idx = 0;
+  int32_t lower_idx = 0;
+  while (upper_idx < upper_size && lower_idx < lower_size) {
+    int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
+
+    if (cmp == 0) {
+      set_index(&(*index)[upper_idx], lower_idx);
+      ++upper_idx;
+      ++lower_idx;
+    } else if (cmp > 0) {
+      // Lower level's file (largest) is smaller, a key won't hit in that
+      // file. Move to next lower file
+      ++lower_idx;
+    } else {
+      // Lower level's file becomes larger, update the index, and
+      // move to the next upper file
+      set_index(&(*index)[upper_idx], lower_idx);
+      ++upper_idx;
+    }
+  }
+
+  while (upper_idx < upper_size) {
+    // Lower files are exhausted, that means the remaining upper files are
+    // greater than any lower files. Set the index to be the lower level size.
+    set_index(&(*index)[upper_idx], lower_size);
+    ++upper_idx;
+  }
+}
+
+void FileIndexer::CalculateRB(const std::vector<FileMetaData*>& upper_files,
+    const std::vector<FileMetaData*>& lower_files,
+    std::vector<IndexUnit>* index,
+    std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+    std::function<void(IndexUnit*, int32_t)> set_index) {
+  const int32_t upper_size = upper_files.size();
+  const int32_t lower_size = lower_files.size();
+  int32_t upper_idx = upper_size - 1;
+  int32_t lower_idx = lower_size - 1;
+  while (upper_idx >= 0 && lower_idx >= 0) {
+    int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
+
+    if (cmp == 0) {
+      set_index(&(*index)[upper_idx], lower_idx);
+      --upper_idx;
+      --lower_idx;
+    } else if (cmp < 0) {
+      // Lower level's file (smallest) is larger, a key won't hit in that
+      // file. Move to next lower file.
+      --lower_idx;
+    } else {
+      // Lower level's file becomes smaller, update the index, and move to
+      // the next the upper file
+      set_index(&(*index)[upper_idx], lower_idx);
+      --upper_idx;
+    }
+  }
+  while (upper_idx >= 0) {
+    // Lower files are exhausted, that means the remaining upper files are
+    // smaller than any lower files. Set it to -1.
+    set_index(&(*index)[upper_idx], -1);
+    --upper_idx;
+  }
+}
+
+}  // namespace rocksdb
@@ -0,0 +1,129 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <vector>
+
+namespace rocksdb {
+
+class Comparator;
+class FileMetaData;
+
+// The file tree structure in Version is prebuilt and the range of each file
+// is known. On Version::Get(), it uses binary search to find a potential file
+// and then check if a target key can be found in the file by comparing the key
+// to each file's smallest and largest key. The results of these comparisions
+// can be reused beyond checking if a key falls into a file's range.
+// With some pre-calculated knowledge, each key comparision that has been done
+// can serve as a hint to narrow down further searches: if a key compared to
+// be smaller than a file's smallest or largest, that comparison can be used
+// to find out the right bound of next binary search. Similarly, if a key
+// compared to be larger than a file's smallest or largest, it can be utilized
+// to find out the left bound of next binary search.
+// With these hints: it can greatly reduce the range of binary search,
+// especially for bottom levels, given that one file most likely overlaps with
+// only N files from level below (where N is max_bytes_for_level_multiplier).
+// So on level L, we will only look at ~N files instead of N^L files on the
+// naive approach.
+class FileIndexer {
+ public:
+  FileIndexer(const uint32_t num_levels, const Comparator* ucmp);
+
+  uint32_t NumLevelIndex();
+
+  uint32_t LevelIndexSize(uint32_t level);
+
+  // Return a file index range in the next level to search for a key based on
+  // smallest and largest key comparision for the current file specified by
+  // level and file_index. When *left_index < *right_index, both index should
+  // be valid and fit in the vector size.
+  void GetNextLevelIndex(
+    const uint32_t level, const uint32_t file_index, const int cmp_smallest,
+    const int cmp_largest, int32_t* left_bound, int32_t* right_bound);
+
+  void ClearIndex();
+
+  void UpdateIndex(std::vector<FileMetaData*>* const files);
+
+  enum {
+    kLevelMaxIndex = std::numeric_limits<int32_t>::max()
+  };
+
+ private:
+  const uint32_t num_levels_;
+  const Comparator* ucmp_;
+
+  struct IndexUnit {
+    IndexUnit()
+      : smallest_lb(0), largest_lb(0), smallest_rb(-1), largest_rb(-1) {}
+    // During file search, a key is compared against smallest and largest
+    // from a FileMetaData. It can have 3 possible outcomes:
+    // (1) key is smaller than smallest, implying it is also smaller than
+    //     larger. Precalculated index based on "smallest < smallest" can
+    //     be used to provide right bound.
+    // (2) key is in between smallest and largest.
+    //     Precalculated index based on "smallest > greatest" can be used to
+    //     provide left bound.
+    //     Precalculated index based on "largest < smallest" can be used to
+    //     provide right bound.
+    // (3) key is larger than largest, implying it is also larger than smallest.
+    //     Precalculated index based on "largest > largest" can be used to
+    //     provide left bound.
+    //
+    // As a result, we will need to do:
+    // Compare smallest (<=) and largest keys from upper level file with
+    // smallest key from lower level to get a right bound.
+    // Compare smallest (>=) and largest keys from upper level file with
+    // largest key from lower level to get a left bound.
+    //
+    // Example:
+    //    level 1:              [50 - 60]
+    //    level 2:        [1 - 40], [45 - 55], [58 - 80]
+    // A key 35, compared to be less than 50, 3rd file on level 2 can be
+    // skipped according to rule (1). LB = 0, RB = 1.
+    // A key 53, sits in the middle 50 and 60. 1st file on level 2 can be
+    // skipped according to rule (2)-a, but the 3rd file cannot be skipped
+    // because 60 is greater than 58. LB = 1, RB = 2.
+    // A key 70, compared to be larger than 60. 1st and 2nd file can be skipped
+    // according to rule (3). LB = 2, RB = 2.
+    //
+    // Point to a left most file in a lower level that may contain a key,
+    // which compares greater than smallest of a FileMetaData (upper level)
+    int32_t smallest_lb;
+    // Point to a left most file in a lower level that may contain a key,
+    // which compares greater than largest of a FileMetaData (upper level)
+    int32_t largest_lb;
+    // Point to a right most file in a lower level that may contain a key,
+    // which compares smaller than smallest of a FileMetaData (upper level)
+    int32_t smallest_rb;
+    // Point to a right most file in a lower level that may contain a key,
+    // which compares smaller than largest of a FileMetaData (upper level)
+    int32_t largest_rb;
+  };
+
+  void CalculateLB(const std::vector<FileMetaData*>& upper_files,
+    const std::vector<FileMetaData*>& lower_files,
+    std::vector<IndexUnit>* index,
+    std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+    std::function<void(IndexUnit*, int32_t)> set_index);
+
+  void CalculateRB(const std::vector<FileMetaData*>& upper_files,
+    const std::vector<FileMetaData*>& lower_files,
+    std::vector<IndexUnit>* index,
+    std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+    std::function<void(IndexUnit*, int32_t)> set_index);
+
+  std::vector<std::vector<IndexUnit>> next_level_index_;
+  std::vector<int32_t> level_rb_;
+};
+
+}  // namespace rocksdb