Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ArrayOfRagged #927

Merged
merged 8 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 89 additions & 15 deletions k2/csrc/array_of_ragged.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/**
* Copyright 2022 Xiaomi Corporation (authors: Wei Kang)
* Copyright 2022 Xiaomi Corporation (authors: Daniel Povey, Wei Kang)
* 2022 ASLP@NWPU (authors: Hang Lyu)

*
* See LICENSE for clarification regarding multiple authors
*
Expand All @@ -20,35 +22,107 @@

namespace k2 {

Array1OfRaggedShape::Array1OfRaggedShape(RaggedShape *src, int32_t num_srcs)
: num_srcs_(num_srcs) {
K2_CHECK_GE(num_srcs, 1);
K2_CHECK(src);
num_axes_ = src[0].NumAxes();
c_ = src[0].Context();
Array1OfRaggedShape::Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs) :
num_srcs_(num_srcs) {
K2_CHECK_GT(num_srcs, 0);
K2_CHECK(srcs);

// Initialize context and num_axes_.
c_ = srcs[0].Context();
num_axes_ = srcs[0].NumAxes();

// Check if they have same num-axes and compatible context.
for (int32_t i = 1; i < num_srcs_; ++i) {
K2_CHECK_EQ(num_axes_, srcs[i].NumAxes());
K2_CHECK(c_->IsCompatible(*(srcs[i].Context())));
}

row_splits_ =
Array2<const int32_t *>(GetCpuContext(), num_axes_ - 1, num_srcs_);
// Initialize row_splits__, row_ids_ and tot_sizes_.
//
// Notice: since the Data() function is a __host__ function, it cannot be
// called on GPU. It limits us to work on CPU so that the row_splits_ and
// row_ids_ are populated on CPU, although the operator() of Array2 is a
// __host__ and __device__ function. Bear in mind, we cannot access the
// GPU data on CPU.
row_splits_ = Array2<const int32_t *>(GetCpuContext(),
num_axes_ - 1, num_srcs_);
row_ids_ = Array2<const int32_t *>(GetCpuContext(), num_axes_ - 1, num_srcs_);

// Notice: no matter the return value of TotSize() is from 'cached_tot_size'
// or the Back() function (i.e. operator[]) of array1, it it a CPU value.
tot_sizes_ = Array1<int32_t>(GetCpuContext(), num_axes_, 0);

auto row_splits_acc = row_splits_.Accessor(),
row_ids_acc = row_ids_.Accessor();
// Bear in mind, when axis == 0, the TotSize() is row_splits.Dim() - 1.
// When 0 < axis < NumAxes(), the TotSize() is row_splits.Back().
int32_t *tot_sizes_data = tot_sizes_.Data();

for (int32_t i = 0; i < num_srcs_; ++i) {
K2_CHECK_EQ(src[i].NumAxes(), num_axes_);
K2_CHECK(c_->IsCompatible(*(src[i].Context())));
for (int32_t j = 1; j < num_axes_; ++j) {
row_splits_acc(j - 1, i) = src[i].RowSplits(j).Data();
row_ids_acc(j - 1, i) = src[i].RowIds(j).Data();
tot_sizes_data[j] += src[i].TotSize(j);
row_splits_acc(j - 1, i) = srcs[i].RowSplits(j).Data();
row_ids_acc(j - 1, i) = srcs[i].RowIds(j).Data();
tot_sizes_data[j] += srcs[i].TotSize(j);
}
tot_sizes_data[0] += src[i].TotSize(0);
tot_sizes_data[0] += srcs[i].TotSize(0);
}

row_splits_ = row_splits_.To(c_);
row_ids_ = row_ids_.To(c_);
tot_sizes_ = tot_sizes_.To(c_);


// Initialize meat_row_splits_
// We populate this on CPU and transfer to GPU.
meta_row_splits_ = Array2<int32_t>(GetCpuContext(), num_axes_, num_srcs_ + 1);
offsets_ = Array2<int32_t>(GetCpuContext(), num_axes_ + 1, num_srcs_ + 1);

auto meta_row_splits_acc = meta_row_splits_.Accessor(),
offsets_acc = offsets_.Accessor();

// Initialize the 1st row of offsets_, which contains 0,1,2,...
for (int32_t col = 0; col <= num_srcs_; ++col) {
offsets_acc(0, col) = col;
}
// Initialize the 1st col of meta_row_splits_ and offsets_
for (int32_t row = 0; row < num_axes_; ++row) {
meta_row_splits_acc(row, 0) = 0;
offsets_acc(row + 1, 0) = 0;
}

// The meta_row_splits_ is the cumulative sum of the tot-sizes of the
// individual arrays.
for (int32_t i = 0; i < num_axes_; ++i) {
for (int32_t j = 1; j <= num_srcs_; ++j) {
meta_row_splits_acc(i, j) = meta_row_splits_acc(i, j - 1) +
srcs[j - 1].TotSize(i);
offsets_acc(i + 1, j) = meta_row_splits_acc(i, j);
}
}

// Initialize meta_row_ids_
// Elements are in [0, NumSrcs() - 1]
meta_row_ids_.resize(num_axes_);

for (int32_t axis = 0; axis < num_axes_; ++axis) {
// The length equals to TotSize(axis)
meta_row_ids_.at(axis) = Array1<int32_t>(
GetCpuContext(), meta_row_splits_acc(axis, num_srcs_));
int32_t *meta_row_ids_data = meta_row_ids_[axis].Data();

int32_t cur_row_start = meta_row_splits_acc(axis, 0);
for (int32_t src = 0; src < num_srcs_; ++src) {
int32_t next_row_start = meta_row_splits_acc(axis, src + 1);
for (; cur_row_start < next_row_start; ++cur_row_start) {
meta_row_ids_data[cur_row_start] = src;
}
}
meta_row_ids_[axis] = meta_row_ids_[axis].To(c_);
}

meta_row_splits_ = meta_row_splits_.To(c_);
offsets_ = offsets_.To(c_);
}


} // namespace k2
119 changes: 83 additions & 36 deletions k2/csrc/array_of_ragged.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright 2022 Xiaomi Corporation (authors: Daniel Povey, Wei Kang)
* 2022 ASLP@NWPU (authors: Hang Lyu)
*
* See LICENSE for clarification regarding multiple authors
*
Expand All @@ -24,31 +25,48 @@
#include <vector>

#include "k2/csrc/array.h"
#include "k2/csrc/array_ops.h"
#include "k2/csrc/context.h"
#include "k2/csrc/log.h"
#include "k2/csrc/ragged_ops.h"
#include "k2/csrc/ragged.h"

namespace k2 {

/*
Array1OfRagged<T> is a 1-dimensional array of Ragged<T>.
It is intended for situations where you want to do some operations on
arrays of ragged arrays, without explicitly concatenating them (e.g. to
save time). This is a fairly low-level interface, intended to
be used mostly by CUDA/C++ implementation code. It is a convenience
wrapper that saves you the trouble of creating arrays of pointers.
*/


/*
Array1OfRaggedShape is a convenience function that gives you easy access
to pointers-of-pointers for an array of ragged shapes.
*/
class Array1OfRaggedShape {
public:
// Default constructor.
Array1OfRaggedShape() = default;

/*
Constructor.
Args:
srcs: pointers to the source shapes, a CPU pointer
num_srcs: the number of source shapes. All shapes must have the
same NumAxes() and must be on the same device.
srcs: pointers to the source shapes, a CPU pointer
num_srcs: the number of source shapes. All shapes must have the
same NumAxes() and must be on the same device.

TODO: we'll likely, later, add optional args which dictate which of
the MetaRowSplits() and MetaRowIds() are to be pre-populated; this should
enable us to save kernels by combining certain operations across the
axes.

*/
Array1OfRaggedShape(RaggedShape *srcs, int32_t num_srcs);
Array1OfRaggedShape() = default;
Array1OfRaggedShape(RaggedShape *srcs,
int32_t num_srcs);


int32_t NumSrcs() const { return num_srcs_; }
int32_t NumAxes() const { return num_axes_; }
Expand All @@ -63,23 +81,35 @@ class Array1OfRaggedShape {
// Returns device-accessible vector of row-splits for a particular
// axis, indexed by 0 <= src < num_srcs.
const int32_t **RowSplits(int32_t axis) {
return row_splits_.Row(axis - 1).Data();
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return row_splits_.Row(axis - 1).Data();
}

// Returns device-accessible array of row-ids for the individual shapes
// indexed [axis-1][src], with 0 <= src < num_srcs. The shape of this
// Array2 is [NumAxes() - 1][NumSrcs()].
const Array2<const int32_t *> *RowIds() const { return &row_ids_; }
const Array2<const int32_t*> *RowIds() const { return &row_ids_; }


// Returns device-accessible vector of row-splits for a particular
// axis, indexed by 0 <= src < num_srcs.
const int32_t **RowIds(int32_t axis) { return row_ids_.Row(axis - 1).Data(); }
const int32_t **RowIds(int32_t axis) {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return row_ids_.Row(axis - 1).Data();
}


/* Return the total size on this axis, which is the sum of the TotSize() of
the individual shapes. Requires 0 <= axis < NumAxes() and
for axis=0 the returned value is the same as Dim0().
*/
int32_t TotSize(int32_t axis) const { return tot_sizes_[axis]; }
int32_t TotSize(int32_t axis) const {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return tot_sizes_[axis];
}

// equivalent to TotSize(0).
int32_t Dim0() const { return TotSize(0); }
Expand All @@ -88,7 +118,7 @@ class Array1OfRaggedShape {
along the src axis, of the tot-sizes of the individual arrays.
This Array2 is of shape [NumAxes()][NumSrcs() + 1], indexed [axis][src];
caution, the indexing is different from RowSplits(), there is no offset.
Also, the meta_row_splits0 is a thing, unlike with regular row-splits
Also, the meta_row_splits_ is a thing, unlike with regular row-splits
which start from 1.

Caution: the lengths of the arrays pointed to by the elements of this
Expand All @@ -99,38 +129,47 @@ class Array1OfRaggedShape {
to GPU, this will be faster than invoking an extra kernel in normal cases
when the NumSrcs() is small. [Also: see GetRowInfoMulti()].
*/
// TODO: implement it...
Array2<int32_t> MetaRowSplits();
const Array2<int32_t> &MetaRowSplits() const { return meta_row_splits_; }

// could POSSIBLY add this so this code could be used in functions like
// Stack(). would be like MetaRowSplits but with an extra 1st row containing
// 0,1,2,... We could perhaps create it with 1 extra initial row so this is
// always convenient to output.
// TODO: implement it...
Array2<int32_t> Offsets();
const Array2<int32_t> &Offsets() const { return offsets_; }

/*
Returns the meta-row-splits for a particular axis, with 0 <= axis <
NumAxes(); this is the cumulative sum of the TotSize(axis) for all of the
sources, with MetaRowSplits(axis).Dim() == NumSrcs() + 1.
Returns the meta-row-splits for a particular axis, with
0 <= axis < NumAxes();
this is the cumulative sum of the TotSize(axis) for all of the sources,
with MetaRowSplits(axis).Dim() == NumSrcs() + 1.

Note: in ragged_ops.cu we refer to this as composed_row_splits
Note: in ragged_opts.cu we refer to this as composed_row_splits
*/
// TODO: implement it...
Array1<int32_t> MetaRowSplits(int32_t axis);
Array1<int32_t> MetaRowSplits(int32_t axis) {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return meta_row_splits_.Row(axis);
}

/* Return the device-accessible meta-row-ids, which are the row-ids
corresponding to MetaRowSplits(); this tells us, for indexes into the
appended/concatenated array, which source array they belong to, i.e.
elements are in [0,NumSrcs()-1].
appended/concatenated array, which source array they belong to,
i.e. elements are in [0,NumSrcs()-1].

This cannot be an Array2 because unlike the MetaRowSplits(), all the
row-ids arrays are of different lengths.

Note: in ragged_ops.cu we refer to this as composed_row_ids.
*/
// TODO: implement it...
Array1<int32_t *> MetaRowIds();
Array1<const int32_t*> MetaRowIds() {
Array1<const int32_t*> ans(GetCpuContext(), num_axes_);
const int32_t* *ans_data = ans.Data();
for (int32_t i = 0; i < num_axes_; ++i) {
ans_data[i] = meta_row_ids_[i].Data();
}
ans = ans.To(c_);
return ans;
}

/*
Returns the meta-row-ids for a particular axis, with 0 <= axis < NumAxes();
Expand All @@ -140,18 +179,28 @@ class Array1OfRaggedShape {
would tell us which source an idx012 with value 100 into axis 2 of
concatenated array would come from.
*/
// TODO: implement it...
Array1<int32_t> MetaRowIds(int32_t axis);
const Array1<int32_t> &MetaRowIds(int32_t axis) const {
K2_CHECK_LT(static_cast<uint32_t>(axis),
static_cast<uint32_t>(num_axes_));
return meta_row_ids_[axis];
}

private:
ContextPtr c_;
int32_t num_srcs_;
int32_t num_axes_;

Array2<const int32_t *> row_splits_; // shape [num_axes_ - 1][num_srcs_]
Array2<const int32_t *> row_ids_; // shape [num_axes_ - 1][num_srcs_]
Array1<int32_t> tot_sizes_; // dim num_axes_, this is on CPU
Array1<int32_t> tot_sizes_; // dim num_axes_

Array2<int32_t> meta_row_splits_; // shape [num_axes_][num_srcs_ + 1]
Array2<int32_t> offsets_; // shape [num_axes_][num_srcs_ + 1]
std::vector<Array1<int32_t> > meta_row_ids_; // dim num_axes_
};



/*
Array1OfRagged<T> is a 1-dimensional array of Ragged<T>.
It is intended for situations where you want to do some operations on
Expand All @@ -171,17 +220,14 @@ struct Array1OfRagged {
int32_t NumSrcs() const { return values.Dim(); }
ContextPtr &Context() { return shape.Context(); }

// Default constructor will not leave this a valid Array1OfRagged object,
// you shouldn't do anything with it. Both members will be initialized with
// default constructors.
Array1OfRagged() = default;

/*
Constructor.
Args:
srcs: pointers to the source ragged tensors, a CPU pointer
num_srcs: the number of source ragged tensors. All ragged tensors must
have the same NumAxes() and must be on the same device.
*/
// The 'srcs' should have the same number of axes.
Array1OfRagged(Ragged<T> *srcs, int32_t num_srcs) {
K2_CHECK_GE(num_srcs, 1);
K2_CHECK_GT(num_srcs, 0);
K2_CHECK(srcs);
values = Array1<T *>(GetCpuContext(), num_srcs);
T **values_data = values.Data();
Expand All @@ -195,6 +241,7 @@ struct Array1OfRagged {
}
};


} // namespace k2

#endif // K2_CSRC_ARRAY_OF_RAGGED_H_
Loading