Skip to content

Commit ded6fb0

Browse files
ssnlfacebook-github-bot
authored andcommitted
Add stack & cat support for CPU Half (pytorch#16389)
Summary: Fixes pytorch#6968 Needed for pytorch#14705 Pull Request resolved: pytorch#16389 Differential Revision: D13861446 Pulled By: gchanan fbshipit-source-id: 7b8700b95aaf252d9669693dbddccb2302e58409
1 parent d79e45b commit ded6fb0

File tree

7 files changed

+201
-187
lines changed

7 files changed

+201
-187
lines changed

aten/src/ATen/Declarations.cwrap

+1
Original file line numberDiff line numberDiff line change
@@ -2872,6 +2872,7 @@
28722872
name: _th_cat
28732873
cname: catArray
28742874
variants: [function]
2875+
cpu_half: True
28752876
return: self
28762877
arguments:
28772878
- arg: THTensor* self

aten/src/TH/generic/THTensor.cpp

+125
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,131 @@ scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_
668668
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2)+x3*tensor->stride(3));
669669
}
670670

671+
672+
/* Shape manipulation methods */
673+
void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension)
674+
{
675+
THTensor* inputs[2];
676+
inputs[0] = ta;
677+
inputs[1] = tb;
678+
THTensor_(catArray)(r_, inputs, 2, dimension);
679+
}
680+
681+
void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension);
682+
inline void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension)
683+
{
684+
int first_dims = first->dim();
685+
int second_dims = second->dim();
686+
THArgCheck(first_dims == second_dims, 0,
687+
"Tensors must have same number of dimensions: got %d and %d",
688+
first_dims, second_dims);
689+
for (int dim = 0; dim < first_dims; dim++) {
690+
if (dim == dimension) {
691+
continue;
692+
}
693+
int64_t first_dim_size = first->size(dim);
694+
int64_t second_dim_size = second->size(dim);
695+
THArgCheck(first_dim_size == second_dim_size, 0,
696+
"Sizes of tensors must match except in dimension %d. Got %lld and %lld in dimension %d",
697+
dimension, (long long)first_dim_size, (long long)second_dim_size, dim);
698+
}
699+
}
700+
701+
void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension)
702+
{
703+
// previously, size [0] tensors were the only possible empty tensors; thus, it wasn't possible
704+
// to cat empty tensors unless all the other tensors were 1-dimensional, so we allowed these tensors
705+
// to be "skipped". We maintain this behavior for backwards compatibility, but only for this specific
706+
// size (i.e. other empty sizes are not skipped).
707+
// FIXME: warn if this is the case
708+
bool allSkipped= true;
709+
int64_t nDims = 0;
710+
THTensor *notSkippedTensor; // non-owning reference
711+
auto should_skip = [](THTensor *t) { return t->is_empty() && t->dim() == 1; };
712+
for (int i = 0; i < numInputs; i++) {
713+
if (should_skip(inputs[i])) {
714+
continue;
715+
}
716+
// We've found a non-empty tensor
717+
allSkipped = false;
718+
notSkippedTensor = inputs[i];
719+
nDims = notSkippedTensor->dim();
720+
break;
721+
}
722+
if (allSkipped) {
723+
return;
724+
}
725+
726+
// Compute cat_dimension based on the non-empty tensor
727+
THArgCheck(dimension < nDims, 4, "invalid dimension %d", dimension);
728+
THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs);
729+
730+
// Compute size of the result in the cat dimension
731+
int64_t cat_dim_size = 0;
732+
for (int i = 0; i < numInputs; i++) {
733+
THTensor *tensor = inputs[i];
734+
if (should_skip(tensor)) {
735+
continue;
736+
}
737+
THTensor_(check_shape_except_dim)(notSkippedTensor, tensor, dimension);
738+
cat_dim_size += tensor->size(dimension);
739+
}
740+
741+
// Compute the size of the result
742+
std::vector<int64_t> size(nDims);
743+
for (int dim = 0; dim < nDims; dim++) {
744+
int64_t result_dim_size = notSkippedTensor->size(dim);
745+
if (dim == dimension) {
746+
result_dim_size = cat_dim_size;
747+
}
748+
size[dim] = result_dim_size;
749+
}
750+
THTensor_(resize)(result, size, {});
751+
752+
// Check contiguity of all inputs and result
753+
bool allContiguous = true;
754+
for (int i = 0; i < numInputs; i++) {
755+
if(!should_skip(inputs[i])) {
756+
allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]);
757+
}
758+
}
759+
allContiguous = allContiguous && THTensor_(isContiguous)(result);
760+
761+
// First path is for contiguous inputs along dim 0
762+
// Second path for non-contiguous
763+
int64_t offset;
764+
if (dimension == 0 && allContiguous) {
765+
scalar_t* result_data = THStorage_(data)(THTensor_getStoragePtr(result)) + result->storage_offset();
766+
offset = 0;
767+
for (int j = 0; j < numInputs; j++) {
768+
if (!should_skip(inputs[j])) {
769+
THTensor* input0 = inputs[j];
770+
scalar_t* input0_data = THStorage_(data)(THTensor_getStoragePtr(input0)) + input0->storage_offset();
771+
int64_t input0_size = THTensor_(nElement)(input0);
772+
// C standard says you can't pass nullptrs to memcpy, even if the size is 0; ubsan checks this.
773+
if (input0_size != 0) {
774+
memcpy(result_data + offset, input0_data, input0_size*sizeof(scalar_t));
775+
}
776+
offset += input0_size;
777+
}
778+
}
779+
} else {
780+
offset = 0;
781+
for (int j = 0; j < numInputs; j++) {
782+
if (!should_skip(inputs[j])) {
783+
int64_t dimSize = inputs[j]->size(dimension);
784+
THTensor *nt = THTensor_(newWithTensor)(result);
785+
THTensor_(narrow)(nt, NULL, dimension, offset, dimSize);
786+
at::Tensor nt__wrap = THTensor_wrap(nt);
787+
at::Tensor inputs_wrap = THTensor_wrap(inputs[j]);
788+
at::_copy_same_type_(nt__wrap, inputs_wrap);
789+
c10::raw::intrusive_ptr::decref(nt);
790+
offset += dimSize;
791+
}
792+
}
793+
}
794+
}
795+
671796
THDescBuff THTensor_(desc)(const THTensor *tensor) {
672797
const int L = TH_DESC_BUFF_LEN;
673798
THDescBuff buf;

aten/src/TH/generic/THTensor.h

+4
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ TH_API scalar_t THTensor_(get2d)(const THTensor *tensor, int64_t x0, int64_t x1)
125125
TH_API scalar_t THTensor_(get3d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2);
126126
TH_API scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3);
127127

128+
/* Shape manipulation methods */
129+
TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension);
130+
TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension);
131+
128132
/* Debug methods */
129133
TH_API THDescBuff THTensor_(desc)(const THTensor *tensor);
130134
TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor);

aten/src/TH/generic/THTensorMath.h

-2
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,6 @@ TH_API void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, int64_t n
103103
TH_API void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder);
104104
TH_API void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int64_t k, int dim, int dir, int sorted);
105105
TH_API void THTensor_(triu)(THTensor *r_, THTensor *t, int64_t k);
106-
TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension);
107-
TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension);
108106

109107
TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb);
110108

aten/src/TH/generic/THTensorMoreMath.cpp

-123
Original file line numberDiff line numberDiff line change
@@ -1238,129 +1238,6 @@ void THTensor_(triu)(THTensor *r_, THTensor *t, int64_t k)
12381238
}
12391239
}
12401240

1241-
void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension)
1242-
{
1243-
THTensor* inputs[2];
1244-
inputs[0] = ta;
1245-
inputs[1] = tb;
1246-
THTensor_(catArray)(r_, inputs, 2, dimension);
1247-
}
1248-
1249-
void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension);
1250-
inline void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension)
1251-
{
1252-
int first_dims = first->dim();
1253-
int second_dims = second->dim();
1254-
THArgCheck(first_dims == second_dims, 0,
1255-
"Tensors must have same number of dimensions: got %d and %d",
1256-
first_dims, second_dims);
1257-
for (int dim = 0; dim < first_dims; dim++) {
1258-
if (dim == dimension) {
1259-
continue;
1260-
}
1261-
int64_t first_dim_size = first->size(dim);
1262-
int64_t second_dim_size = second->size(dim);
1263-
THArgCheck(first_dim_size == second_dim_size, 0,
1264-
"Sizes of tensors must match except in dimension %d. Got %lld and %lld in dimension %d",
1265-
dimension, (long long)first_dim_size, (long long)second_dim_size, dim);
1266-
}
1267-
}
1268-
1269-
void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension)
1270-
{
1271-
// previously, size [0] tensors were the only possible empty tensors; thus, it wasn't possible
1272-
// to cat empty tensors unless all the other tensors were 1-dimensional, so we allowed these tensors
1273-
// to be "skipped". We maintain this behavior for backwards compatibility, but only for this specific
1274-
// size (i.e. other empty sizes are not skipped).
1275-
// FIXME: warn if this is the case
1276-
bool allSkipped= true;
1277-
int64_t nDims = 0;
1278-
THTensor *notSkippedTensor; // non-owning reference
1279-
auto should_skip = [](THTensor *t) { return t->is_empty() && t->dim() == 1; };
1280-
for (int i = 0; i < numInputs; i++) {
1281-
if (should_skip(inputs[i])) {
1282-
continue;
1283-
}
1284-
// We've found a non-empty tensor
1285-
allSkipped = false;
1286-
notSkippedTensor = inputs[i];
1287-
nDims = notSkippedTensor->dim();
1288-
break;
1289-
}
1290-
if (allSkipped) {
1291-
return;
1292-
}
1293-
1294-
// Compute cat_dimension based on the non-empty tensor
1295-
THArgCheck(dimension < nDims, 4, "invalid dimension %d", dimension);
1296-
THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs);
1297-
1298-
// Compute size of the result in the cat dimension
1299-
int64_t cat_dim_size = 0;
1300-
for (int i = 0; i < numInputs; i++) {
1301-
THTensor *tensor = inputs[i];
1302-
if (should_skip(tensor)) {
1303-
continue;
1304-
}
1305-
THTensor_(check_shape_except_dim)(notSkippedTensor, tensor, dimension);
1306-
cat_dim_size += tensor->size(dimension);
1307-
}
1308-
1309-
// Compute the size of the result
1310-
std::vector<int64_t> size(nDims);
1311-
for (int dim = 0; dim < nDims; dim++) {
1312-
int64_t result_dim_size = notSkippedTensor->size(dim);
1313-
if (dim == dimension) {
1314-
result_dim_size = cat_dim_size;
1315-
}
1316-
size[dim] = result_dim_size;
1317-
}
1318-
THTensor_(resize)(result, size, {});
1319-
1320-
// Check contiguity of all inputs and result
1321-
bool allContiguous = true;
1322-
for (int i = 0; i < numInputs; i++) {
1323-
if(!should_skip(inputs[i])) {
1324-
allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]);
1325-
}
1326-
}
1327-
allContiguous = allContiguous && THTensor_(isContiguous)(result);
1328-
1329-
// First path is for contiguous inputs along dim 0
1330-
// Second path for non-contiguous
1331-
int64_t offset;
1332-
if (dimension == 0 && allContiguous) {
1333-
scalar_t* result_data = THStorage_(data)(THTensor_getStoragePtr(result)) + result->storage_offset();
1334-
offset = 0;
1335-
for (int j = 0; j < numInputs; j++) {
1336-
if (!should_skip(inputs[j])) {
1337-
THTensor* input0 = inputs[j];
1338-
scalar_t* input0_data = THStorage_(data)(THTensor_getStoragePtr(input0)) + input0->storage_offset();
1339-
int64_t input0_size = THTensor_(nElement)(input0);
1340-
// C standard says you can't pass nullptrs to memcpy, even if the size is 0; ubsan checks this.
1341-
if (input0_size != 0) {
1342-
memcpy(result_data + offset, input0_data, input0_size*sizeof(scalar_t));
1343-
}
1344-
offset += input0_size;
1345-
}
1346-
}
1347-
} else {
1348-
offset = 0;
1349-
for (int j = 0; j < numInputs; j++) {
1350-
if (!should_skip(inputs[j])) {
1351-
int64_t dimSize = inputs[j]->size(dimension);
1352-
THTensor *nt = THTensor_(newWithTensor)(result);
1353-
THTensor_(narrow)(nt, NULL, dimension, offset, dimSize);
1354-
at::Tensor nt__wrap = THTensor_wrap(nt);
1355-
at::Tensor inputs_wrap = THTensor_wrap(inputs[j]);
1356-
at::_copy_same_type_(nt__wrap, inputs_wrap);
1357-
c10::raw::intrusive_ptr::decref(nt);
1358-
offset += dimSize;
1359-
}
1360-
}
1361-
}
1362-
}
1363-
13641241
int THTensor_(equal)(THTensor *ta, THTensor* tb)
13651242
{
13661243
int equal = 1;

test/common_utils.py

+21-15
Original file line numberDiff line numberDiff line change
@@ -392,22 +392,28 @@ def assertEqual(self, x, y, prec=None, message='', allow_inf=False):
392392
def assertTensorsEqual(a, b):
393393
super(TestCase, self).assertEqual(a.size(), b.size(), message)
394394
if a.numel() > 0:
395-
b = b.type_as(a)
396-
b = b.cuda(device=a.get_device()) if a.is_cuda else b.cpu()
397-
# check that NaNs are in the same locations
398-
nan_mask = a != a
399-
self.assertTrue(torch.equal(nan_mask, b != b), message)
395+
if a.device.type == 'cpu' and a.dtype == torch.float16:
396+
# CPU half tensors don't have the methods we need below
397+
a = a.to(torch.float32)
398+
if TEST_WITH_ROCM:
399+
# Workaround for bug https://github.com/pytorch/pytorch/issues/16448
400+
# TODO: remove after the bug is resolved.
401+
b = b.to(a.dtype).to(a.device)
402+
else:
403+
b = b.to(a)
400404
diff = a - b
401-
diff[nan_mask] = 0
402-
# inf check if allow_inf=True
403-
if allow_inf:
404-
inf_mask = (a == float("inf")) | (a == float("-inf"))
405-
self.assertTrue(torch.equal(inf_mask,
406-
(b == float("inf")) | (b == float("-inf"))),
407-
message)
408-
diff[inf_mask] = 0
409-
# TODO: implement abs on CharTensor
410-
if diff.is_signed() and 'CharTensor' not in diff.type():
405+
if a.is_floating_point():
406+
# check that NaNs are in the same locations
407+
nan_mask = torch.isnan(a)
408+
self.assertTrue(torch.equal(nan_mask, torch.isnan(b)), message)
409+
diff[nan_mask] = 0
410+
# inf check if allow_inf=True
411+
if allow_inf:
412+
inf_mask = torch.isinf(a)
413+
self.assertTrue(torch.equal(inf_mask, torch.isinf(b)), message)
414+
diff[inf_mask] = 0
415+
# TODO: implement abs on CharTensor (int8)
416+
if diff.is_signed() and diff.dtype != torch.int8:
411417
diff = diff.abs()
412418
max_err = diff.max()
413419
self.assertLessEqual(max_err, prec, message)

0 commit comments

Comments
 (0)