-
Notifications
You must be signed in to change notification settings - Fork 222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add segsort from moderngpu #181
Changes from all commits
e0d17a0
a2f82b9
533406e
9cb83a5
c09a3ed
a8120f5
ce9eddd
01d1091
89c8595
8f2f8e0
6c14c1c
5a9fc1c
0eaf4df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Copyright (c) 2020 Mobvoi AI Lab, Beijing, China (authors: Fangjun Kuang) | ||
# See ../LICENSE for clarification regarding multiple authors | ||
|
||
function(download_moderngpu) | ||
if(CMAKE_VERSION VERSION_LESS 3.11) | ||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) | ||
endif() | ||
|
||
include(FetchContent) | ||
|
||
# this is the latest commit of modern gpu as of 2020-09-26 | ||
set(moderngpu_URL "https://github.com/moderngpu/moderngpu/archive/2b3985541c8e88a133769598c406c33ddde9d0a5.zip") | ||
set(moderngpu_HASH "SHA256=191546af18cd5fb858ecb561316f3af67537ab16f610fc8f1a5febbffc27755a") | ||
|
||
FetchContent_Declare(moderngpu | ||
URL ${moderngpu_URL} | ||
URL_HASH ${moderngpu_HASH} | ||
) | ||
|
||
FetchContent_GetProperties(moderngpu) | ||
if(NOT moderngpu) | ||
message(STATUS "Downloading moderngpu") | ||
FetchContent_Populate(moderngpu) | ||
endif() | ||
message(STATUS "moderngpu is downloaded to ${moderngpu_SOURCE_DIR}") | ||
add_library(moderngpu INTERFACE) | ||
target_include_directories(moderngpu INTERFACE ${moderngpu_SOURCE_DIR}/src) | ||
target_compile_options(moderngpu INTERFACE -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w) | ||
endfunction() | ||
|
||
download_moderngpu() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/** | ||
* @brief A better memory allocator for moderngpu. | ||
* | ||
* | ||
* @copyright | ||
* Copyright (c) 2020 Mobvoi AI Lab, Beijing, China (authors: Fangjun Kuang) | ||
* | ||
* @copyright | ||
* See LICENSE for clarification regarding multiple authors | ||
*/ | ||
|
||
#include <utility> | ||
|
||
#include "k2/csrc/context.h" | ||
#include "k2/csrc/moderngpu_allocator.h" | ||
#include "moderngpu/context.hxx" | ||
|
||
namespace { | ||
|
||
class ModernGpuAllocator : public mgpu::standard_context_t { | ||
public: | ||
explicit ModernGpuAllocator(k2::ContextPtr context) | ||
: mgpu::standard_context_t(false, context->GetCudaStream()), | ||
context_(std::move(context)) {} | ||
|
||
void *alloc(size_t size, mgpu::memory_space_t space) override { | ||
K2_DCHECK_EQ(space, mgpu::memory_space_device); | ||
void *deleter_ = nullptr; | ||
void *p = context_->Allocate(size, &deleter_); | ||
K2_DCHECK(deleter_ == nullptr); | ||
return p; | ||
} | ||
|
||
void free(void *p, mgpu::memory_space_t space) override { | ||
K2_DCHECK_EQ(space, mgpu::memory_space_device); | ||
context_->Deallocate(p, nullptr); | ||
} | ||
|
||
private: | ||
k2::ContextPtr context_; | ||
}; | ||
|
||
} // namespace | ||
|
||
namespace k2 { | ||
|
||
std::unique_ptr<mgpu::context_t> GetModernGpuAllocator( | ||
int32_t device_id /*= -1*/) { | ||
return std::make_unique<ModernGpuAllocator>(GetCudaContext(device_id)); | ||
} | ||
|
||
} // namespace k2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/** | ||
* @brief This is an allocator for moderngpu only. | ||
* | ||
* Currently it is used by `SortSublists`. | ||
* | ||
* @copyright | ||
* Copyright (c) 2020 AI Lab, Beijing, China (authors: Fangjun Kuang) | ||
* | ||
* @copyright | ||
* See LICENSE for clarification regarding multiple authors | ||
*/ | ||
|
||
#ifndef K2_CSRC_MODERNGPU_ALLOCATOR_H_ | ||
#define K2_CSRC_MODERNGPU_ALLOCATOR_H_ | ||
|
||
#include <memory> | ||
|
||
#include "moderngpu/context.hxx" | ||
|
||
namespace k2 { | ||
// Return a context for moderngpu that has a better memory allocator | ||
// than mgpu::standard_context_t | ||
std::unique_ptr<mgpu::context_t> GetModernGpuAllocator(int32_t device_id = -1); | ||
|
||
} // namespace k2 | ||
|
||
#endif // K2_CSRC_MODERNGPU_ALLOCATOR_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
* | ||
* @copyright | ||
* Copyright (c) 2020 Xiaomi Corporation (authors: Daniel Povey) | ||
* Mobvoi AI Lab, Beijing, China (authors: Fangjun Kuang) | ||
* | ||
* @copyright | ||
* See LICENSE for clarification regarding multiple authors | ||
|
@@ -15,8 +16,12 @@ | |
#ifndef K2_CSRC_RAGGED_INL_H_ | ||
#define K2_CSRC_RAGGED_INL_H_ | ||
|
||
#include <memory> | ||
#include <vector> | ||
|
||
#include "k2/csrc/moderngpu_allocator.h" | ||
#include "moderngpu/kernel_segsort.hxx" | ||
|
||
namespace k2 { | ||
|
||
template <typename T> | ||
|
@@ -95,6 +100,35 @@ Ragged<T> RandomRagged(T min_value, T max_value, int32_t min_num_axes, | |
return Ragged<T>(shape, values); | ||
} | ||
|
||
template <typename T, typename Op /* = LessThan<T> */> | ||
void SortSublists(Ragged<T> *src, Array1<int32_t> *order) { | ||
K2_DCHECK(IsCompatible(src->values, *order)); | ||
K2_DCHECK_EQ(src->values.Dim(), order->Dim()); | ||
K2_DCHECK_EQ(src->Context()->GetDeviceType(), kCuda) | ||
<< "It supports only CUDA at present"; | ||
|
||
std::unique_ptr<mgpu::context_t> context = | ||
GetModernGpuAllocator(src->Context()->GetDeviceId()); | ||
|
||
Array1<int32_t> &segment = src->shape.RowSplits(src->NumAxes() - 1); | ||
mgpu::segmented_sort_indices(src->values.Data(), // keys | ||
order->Data(), // indices | ||
src->values.Dim(), // count | ||
segment.Data() + 1, // segments | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also tried According to https://github.com/moderngpu/moderngpu/blob/2b3985541c8e88a133769598c406c33ddde9d0a5/tests/test_segsort.cu#L9-L15 std::vector<int> cpu_segsort(const std::vector<int>& data,
const std::vector<int>& segments) {
std::vector<int> copy = data;
int cur = 0;
for(int seg = 0; seg < segments.size(); ++seg) {
int next = segments[seg];
std::sort(copy.data() + cur, copy.data() + next);
cur = next;
}
std::sort(copy.data() + cur, copy.data() + data.size());
return copy;
} The start index of the first segment is 0. That's why I use |
||
segment.Dim() - 1, // num_segments | ||
Op(), // cmp | ||
*context); // context | ||
auto err = cudaGetLastError(); | ||
(void)err; | ||
// TODO(fangjun): err is not cudaSuccess, but why was the data sorted | ||
// correctly? | ||
// | ||
// Check failed: err == cudaSuccess (9 vs. 0) Error: invalid configuration | ||
// argument. | ||
// | ||
// K2_DCHECK_CUDA_ERROR(err); | ||
} | ||
|
||
} // namespace k2 | ||
|
||
#endif // K2_CSRC_RAGGED_INL_H_ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's better to add
K2_CHECK_EQ(src->shape.NumAxes() == 2
at entry as we just sort values insrc
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It supports ragged arrays with more than two axes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, at least check
>=2
here otherwise there will be an error when callingshape.RowSplits(NumAxes() - 1)
, note that you should add such requirements in the documentationk2/k2/csrc/ragged.h
Lines 577 to 584 in a178524
BTW, in other APIs, such as
MaxPerSublist
, we requireNumAxes() ==2
now. @danpovey I think we should make those api consistent, should we require exactlyNumAxes() == 2
orNumAxes() >=2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In general I'd like to make the APIs as general as they can be made without requiring extra work.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It has no requirement about the number of axes a ragged array has. It always sorts the last axis.
This function works as long as the ragged array is not empty.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mm.. definitely it requires NumAxes >=2...It's meaningless to call RowSplits(1) on a shape with
NumAxes < 2
, actually it will crash..Then I suggest we add documentation and requirements for those APIs with
axes >= 2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you give an example to show what a ragged array with
NumAxes < 2
looks like?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mm, the point here is, now we allow user define a empty RaggedShape technically,
k2/k2/csrc/ragged.h
Line 137 in a178524
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll add a
K2_DCHECK
in the next commit.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@qindazhu
fixed in #218
k2/k2/csrc/ragged_inl.h
Line 111 in d291daf