Project-OSRM · DennisOSRM · Jun 15, 2014 · Jun 7, 2014 · Jun 2, 2014 · Jun 5, 2014
diff --git a/DataStructures/RangeTable.h b/DataStructures/RangeTable.h
@@ -0,0 +1,231 @@
+#ifndef __RANGE_TABLE_H__
+#define __RANGE_TABLE_H__
+
+#include "SharedMemoryFactory.h"
+#include "SharedMemoryVectorWrapper.h"
+
+#include <boost/range/irange.hpp>
+
+#include <fstream>
+#include <vector>
+#include <array>
+
+/*
+ * These pre-declarations are needed because parsing C++ is hard
+ * and otherwise the compiler gets confused.
+ */
+
+template<unsigned BLOCK_SIZE=16, bool USE_SHARED_MEMORY = false> class RangeTable;
+
+template<unsigned BLOCK_SIZE, bool USE_SHARED_MEMORY>
+std::ostream& operator<<(std::ostream &out, const RangeTable<BLOCK_SIZE, USE_SHARED_MEMORY> &table);
+
+template<unsigned BLOCK_SIZE, bool USE_SHARED_MEMORY>
+std::istream& operator>>(std::istream &in, RangeTable<BLOCK_SIZE, USE_SHARED_MEMORY> &table);
+
+/**
+ * Stores adjacent ranges in a compressed format.
+ *
+ * Maximum supported length of a range is 255.
+ *
+ * Note: BLOCK_SIZE is the number of differential encodoed values.
+ * But each block consists of an absolute value and BLOCK_SIZE differential values.
+ * So the effective block size is sizeof(unsigned) + BLOCK_SIZE.
+ */
+template<unsigned BLOCK_SIZE, bool USE_SHARED_MEMORY>
+class RangeTable
+{
+public:
+
+    typedef std::array<unsigned char, BLOCK_SIZE> BlockT;
+    typedef typename ShM<BlockT, USE_SHARED_MEMORY>::vector   BlockContainerT;
+    typedef typename ShM<unsigned, USE_SHARED_MEMORY>::vector OffsetContainerT;
+    typedef decltype(boost::irange(0u,0u))                    RangeT;
+
+    friend std::ostream& operator<< <>(std::ostream &out, const RangeTable &table);
+    friend std::istream& operator>> <>(std::istream &in, RangeTable &table);
+
+    RangeTable() {}
+
+    // for loading from shared memory
+    explicit RangeTable(OffsetContainerT& external_offsets, BlockContainerT& external_blocks, const unsigned sum_lengths)
+    : sum_lengths(sum_lengths)
+    {
+        block_offsets.swap(external_offsets);
+        diff_blocks.swap(external_blocks);
+    }
+
+    // construct table from length vector
+    explicit RangeTable(const std::vector<unsigned>& lengths)
+    {
+        const unsigned number_of_blocks = [&lengths]() {
+            unsigned num = (lengths.size() + 1) / (BLOCK_SIZE + 1);
+            if ((lengths.size() + 1) % (BLOCK_SIZE + 1) != 0)
+            {
+                num += 1;
+            }
+            return num;
+        }();
+
+        block_offsets.reserve(number_of_blocks);
+        diff_blocks.reserve(number_of_blocks);
+
+        unsigned last_length = 0;
+        unsigned lengths_prefix_sum = 0;
+        unsigned block_idx = 0;
+        unsigned block_counter = 0;
+        BlockT block;
+        unsigned block_sum = 0;
+        for (const unsigned l : lengths)
+        {
+            // first entry of a block: encode absolute offset
+            if (block_idx == 0)
+            {
+                block_offsets.push_back(lengths_prefix_sum);
+                block_sum = 0;
+            }
+            else
+            {
+                block[block_idx - 1] = last_length;
+                block_sum += last_length;
+            }
+
+            BOOST_ASSERT((block_idx == 0 && block_offsets[block_counter] == lengths_prefix_sum)
+                || lengths_prefix_sum == (block_offsets[block_counter]+block_sum));
+
+            // block is full
+            if (BLOCK_SIZE == block_idx)
+            {
+                diff_blocks.push_back(block);
+                block_counter++;
+            }
+
+            // we can only store strings with length 255
+            BOOST_ASSERT(l <= 255);
+
+            lengths_prefix_sum += l;
+            last_length = l;
+
+            block_idx = (block_idx + 1) % (BLOCK_SIZE + 1);
+        }
+
+        // Last block can't be finished because we didn't add the sentinel
+        BOOST_ASSERT (block_counter == (number_of_blocks - 1));
+
+        // one block missing: starts with guard value
+        if (0 == block_idx)
+        {
+            // the last value is used as sentinel
+            block_offsets.push_back(lengths_prefix_sum);
+            block_idx = (block_idx + 1) % BLOCK_SIZE;
+        }
+
+        while (0 != block_idx)
+        {
+            block[block_idx - 1] = last_length;
+            last_length = 0;
+            block_idx = (block_idx + 1) % (BLOCK_SIZE + 1);
+        }
+        diff_blocks.push_back(block);
+
+        BOOST_ASSERT(diff_blocks.size() == number_of_blocks && block_offsets.size() == number_of_blocks);
+
+        sum_lengths = lengths_prefix_sum;
+    }
+
+    inline RangeT GetRange(const unsigned id) const
+    {
+        BOOST_ASSERT(id < block_offsets.size() + diff_blocks.size() * BLOCK_SIZE);
+        // internal_idx 0 is implicitly stored in block_offsets[block_idx]
+        const unsigned internal_idx = id % (BLOCK_SIZE + 1);
+        const unsigned block_idx = id / (BLOCK_SIZE + 1);
+
+        BOOST_ASSERT(block_idx < diff_blocks.size());
+
+        unsigned begin_idx = 0;
+        unsigned end_idx = 0;
+        begin_idx = block_offsets[block_idx];
+        const BlockT& block = diff_blocks[block_idx];
+        if (internal_idx > 0)
+        {
+            begin_idx += PrefixSumAtIndex(internal_idx - 1, block);
+        }
+
+        // next index inside current block
+        if (internal_idx < BLOCK_SIZE)
+        {
+            // note internal_idx - 1 is the *current* index for uint8_blocks
+            end_idx = begin_idx + block[internal_idx];
+        }
+        else
+        {
+            BOOST_ASSERT(block_idx < block_offsets.size() - 1);
+            end_idx = block_offsets[block_idx + 1];
+        }
+
+        BOOST_ASSERT(begin_idx < sum_lengths && end_idx <= sum_lengths);
+        BOOST_ASSERT(begin_idx <= end_idx);
+
+        return boost::irange(begin_idx, end_idx);
+    }
+private:
+
+    inline unsigned PrefixSumAtIndex(int index, const BlockT& block) const;
+
+    // contains offset for each differential block
+    OffsetContainerT block_offsets;
+    // blocks of differential encoded offsets, should be aligned
+    BlockContainerT diff_blocks;
+    unsigned sum_lengths;
+};
+
+template<unsigned BLOCK_SIZE, bool USE_SHARED_MEMORY>
+unsigned RangeTable<BLOCK_SIZE, USE_SHARED_MEMORY>::PrefixSumAtIndex(int index, const BlockT& block) const
+{
+    // this loop looks inefficent, but a modern compiler
+    // will emit nice SIMD here, at least for sensible block sizes. (I checked.)
+    unsigned sum = 0;
+    for (int i = 0; i <= index; ++i)
+    {
+        sum += block[i];
+    }
+
+    return sum;
+}
+
+template<unsigned BLOCK_SIZE, bool USE_SHARED_MEMORY>
+std::ostream& operator<<(std::ostream &out, const RangeTable<BLOCK_SIZE, USE_SHARED_MEMORY> &table)
+{
+    // write number of block
+    const unsigned number_of_blocks = table.diff_blocks.size();
+    out.write((char *) &number_of_blocks, sizeof(unsigned));
+    // write total length
+    out.write((char *) &table.sum_lengths, sizeof(unsigned));
+    // write block offsets
+    out.write((char *) table.block_offsets.data(), sizeof(unsigned) * table.block_offsets.size());
+    // write blocks
+    out.write((char *) table.diff_blocks.data(), BLOCK_SIZE * table.diff_blocks.size());
+
+    return out;
+}
+
+template<unsigned BLOCK_SIZE, bool USE_SHARED_MEMORY>
+std::istream& operator>>(std::istream &in, RangeTable<BLOCK_SIZE, USE_SHARED_MEMORY> &table)
+{
+    // read number of block
+    unsigned number_of_blocks;
+    in.read((char *) &number_of_blocks, sizeof(unsigned));
+    // read total length
+    in.read((char *) &table.sum_lengths, sizeof(unsigned));
+
+    table.block_offsets.resize(number_of_blocks);
+    table.diff_blocks.resize(number_of_blocks);
+
+    // read block offsets
+    in.read((char *) table.block_offsets.data(), sizeof(unsigned) * number_of_blocks);
+    // read blocks
+    in.read((char *) table.diff_blocks.data(), BLOCK_SIZE * number_of_blocks);
+    return in;
+}
+
+#endif
diff --git a/DataStructures/StaticRTree.h b/DataStructures/StaticRTree.h
@@ -766,7 +766,9 @@ class StaticRTree
         }
         const uint64_t seek_pos = sizeof(uint64_t) + leaf_id * sizeof(LeafNode);
         thread_local_rtree_stream->seekg(seek_pos);
+        BOOST_ASSERT_MSG(thread_local_rtree_stream->good(), "Seeking to position in leaf file failed.");
         thread_local_rtree_stream->read((char *)&result_node, sizeof(LeafNode));
+        BOOST_ASSERT_MSG(thread_local_rtree_stream->good(), "Reading from leaf file failed.");
     }
 
     inline bool EdgesAreEquivalent(const FixedPointCoordinate &a,

diff --git a/Extractor/ExtractionContainers.cpp b/Extractor/ExtractionContainers.cpp
@@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "../Util/OSRMException.h"
 #include "../Util/SimpleLogger.h"
 #include "../Util/TimingUtil.h"
+#include "../DataStructures/RangeTable.h"
 
 #include <boost/assert.hpp>
 #include <boost/filesystem.hpp>
@@ -64,6 +65,7 @@ void ExtractionContainers::PrepareData(const std::string &output_file_name,
     {
         unsigned number_of_used_nodes = 0;
         unsigned number_of_used_edges = 0;
+
         std::cout << "[extractor] Sorting used nodes        ... " << std::flush;
         TIMER_START(sorting_used_nodes);
         stxxl::sort(used_node_id_list.begin(), used_node_id_list.end(), Cmp(), stxxl_memory);
@@ -395,32 +397,23 @@ void ExtractionContainers::PrepareData(const std::string &output_file_name,
         std::string name_file_streamName = (output_file_name + ".names");
         boost::filesystem::ofstream name_file_stream(name_file_streamName, std::ios::binary);
 
-        // write number of names
-        const unsigned number_of_names = name_list.size() + 1;
-        name_file_stream.write((char *)&(number_of_names), sizeof(unsigned));
-
-        // compute total number of chars
-        unsigned total_number_of_chars = 0;
-        for (const std::string &temp_string : name_list)
-        {
-            total_number_of_chars += temp_string.length();
-        }
-        // write total number of chars
-        name_file_stream.write((char *)&(total_number_of_chars), sizeof(unsigned));
-        // write prefixe sums
-        unsigned name_lengths_prefix_sum = 0;
+        unsigned total_length = 0;
+        std::vector<unsigned> name_lengths;
         for (const std::string &temp_string : name_list)
         {
-            name_file_stream.write((char *)&(name_lengths_prefix_sum), sizeof(unsigned));
-            name_lengths_prefix_sum += temp_string.length();
+            const unsigned string_length = std::min(static_cast<unsigned>(temp_string.length()), 255u);
+            name_lengths.push_back(string_length);
+            total_length += string_length;
         }
-        // duplicate on purpose!
-        name_file_stream.write((char *)&(name_lengths_prefix_sum), sizeof(unsigned));
 
+        RangeTable<> table(name_lengths);
+        name_file_stream << table;
+
+        name_file_stream.write((char*) &total_length, sizeof(unsigned));
         // write all chars consecutively
         for (const std::string &temp_string : name_list)
         {
-            const unsigned string_length = temp_string.length();
+            const unsigned string_length = std::min(static_cast<unsigned>(temp_string.length()), 255u);
             name_file_stream.write(temp_string.c_str(), string_length);
         }
 

diff --git a/Server/DataStructures/InternalDataFacade.h b/Server/DataStructures/InternalDataFacade.h
@@ -38,6 +38,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "../../DataStructures/SharedMemoryVectorWrapper.h"
 #include "../../DataStructures/StaticGraph.h"
 #include "../../DataStructures/StaticRTree.h"
+#include "../../DataStructures/RangeTable.h"
 #include "../../Util/BoostFileSystemFix.h"
 #include "../../Util/GraphLoader.h"
 #include "../../Util/ProgramOptions.h"
@@ -66,13 +67,13 @@ template <class EdgeDataT> class InternalDataFacade : public BaseDataFacade<Edge
     ShM<unsigned, false>::vector m_name_ID_list;
     ShM<TurnInstruction, false>::vector m_turn_instruction_list;
     ShM<char, false>::vector m_names_char_list;
-    ShM<unsigned, false>::vector m_name_begin_indices;
     ShM<bool, false>::vector m_egde_is_compressed;
     ShM<unsigned, false>::vector m_geometry_indices;
     ShM<unsigned, false>::vector m_geometry_list;
 
     std::shared_ptr<StaticRTree<RTreeLeaf, ShM<FixedPointCoordinate, false>::vector, false>>
     m_static_rtree;
+    RangeTable<16, false> m_name_table;
 
     void LoadTimestamp(const boost::filesystem::path &timestamp_path)
     {
@@ -203,16 +204,12 @@ template <class EdgeDataT> class InternalDataFacade : public BaseDataFacade<Edge
     void LoadStreetNames(const boost::filesystem::path &names_file)
     {
         boost::filesystem::ifstream name_stream(names_file, std::ios::binary);
-        unsigned number_of_names = 0;
+
+        name_stream >> m_name_table;
+
         unsigned number_of_chars = 0;
-        name_stream.read((char *)&number_of_names, sizeof(unsigned));
         name_stream.read((char *)&number_of_chars, sizeof(unsigned));
-        BOOST_ASSERT_MSG(0 != number_of_names, "name file broken");
         BOOST_ASSERT_MSG(0 != number_of_chars, "name file broken");
-
-        m_name_begin_indices.resize(number_of_names);
-        name_stream.read((char *)&m_name_begin_indices[0], number_of_names * sizeof(unsigned));
-
         m_names_char_list.resize(number_of_chars + 1); //+1 gives sentinel element
         name_stream.read((char *)&m_names_char_list[0], number_of_chars * sizeof(char));
         BOOST_ASSERT_MSG(0 != m_names_char_list.size(), "could not load any names");
@@ -384,18 +381,16 @@ template <class EdgeDataT> class InternalDataFacade : public BaseDataFacade<Edge
             result = "";
             return;
         }
-        BOOST_ASSERT_MSG(name_id < m_name_begin_indices.size(), "name id too high");
-        const unsigned begin_index = m_name_begin_indices[name_id];
-        const unsigned end_index = m_name_begin_indices[name_id + 1];
-        BOOST_ASSERT_MSG(begin_index < m_names_char_list.size(), "begin index of name too high");
-        BOOST_ASSERT_MSG(end_index < m_names_char_list.size(), "end index of name too high");
+        auto range = m_name_table.GetRange(name_id);
 
-        BOOST_ASSERT_MSG(begin_index <= end_index, "string ends before begin");
         result.clear();
-        result.resize(end_index - begin_index);
-        std::copy(m_names_char_list.begin() + begin_index,
-                  m_names_char_list.begin() + end_index,
-                  result.begin());
+        if (range.begin() != range.end())
+        {
+            result.resize(range.back() - range.front() + 1);
+            std::copy(m_names_char_list.begin() + range.front(),
+                      m_names_char_list.begin() + range.back() + 1,
+                      result.begin());
+        }
     }
 
     virtual unsigned GetGeometryIndexForEdgeID(const unsigned id) const