Skip to content

Commit f4368dc

Browse files
committed
fix bug
1 parent 66b4eaf commit f4368dc

File tree

5 files changed

+6
-8
lines changed

5 files changed

+6
-8
lines changed

gammagl/mpops/paddle.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def unsorted_segment_sum(x, segment_ids, num_segments=None):
1818
else:
1919
num_segments = pd.max(segment_ids)+1
2020
if use_ext:
21-
return paddle_segment.segment_sum(x, segment_ids, num_segments)
21+
return paddle_ext.segment_sum(x, segment_ids, num_segments)
2222
idx_ = pd.argsort(segment_ids)
2323
x = pd.gather(x, idx_)
2424
segment_ids = pd.gather(segment_ids, idx_)

gammagl/mpops/paddle_ext/cuda/segment_sum_cuda.cu

-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ __global__ void segment_sum_cuda_forward_kernel(const data_t *x_data, const int6
1212
int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
1313
int64_t e = (thread_idx / K) % E;
1414
int64_t k = thread_idx % K;
15-
printf(" thread_idx = %d \n", thread_idx);
1615
if (thread_idx < numel) {
1716
int64_t idx = index_data[e];
18-
printf("%f \n", *out_data);
1917
atomicAdd(out_data + idx * K + k,
2018
x_data[thread_idx]);
2119
}

gammagl/mpops/paddle_ext/readme.md

+2
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ Compile Steps: (CMake, not work)
55
TODO: support cmake
66

77
> In `paddle/utils/cpp_extension/extension_utils.py:341L`, flags `"-ccbin"` & `"cc"` may cause error, since Paddle needs `nvcc` compiling with higher c++ standard while these flags may cause flag like `-std=c++14` does not work, when you are using a lower version gcc. Besides, we usually recommand to set gcc path to `CC` rather than `cc` in Linux, it will also cause error. Just annotate them.
8+
9+
> Please keep the version of nvcc and the paddle-cuda consist, it may occur `the provided PTX was compiled with an unsupported toolchain.`

gammagl/mpops/torch_ext/cpu/segment_max_cpu.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,13 @@ std::tuple<torch::Tensor, torch::Tensor> segment_max_cpu_forward(torch::Tensor&
5050
for (auto e = 0; e < E; ++e) {
5151
idx = index_data[e];
5252
for (auto k = 0; k < K; ++k) {
53-
if (out_data[idx * K + k] < x_data[e * K + k]) {
5453
#ifdef COMPILE_WITH_OMP
55-
#pragma omp atomic
54+
#pragma omp critical
5655
#endif
57-
{
56+
if (out_data[idx * K + k] < x_data[e * K + k]) {
5857
out_data[idx * K + k] = x_data[e * K + k];
5958
arg_out_data[idx * K + k] = e;
6059
}
61-
}
6260
}
6361
}
6462
out.masked_fill_(out == std::numeric_limits<int64_t>::lowest(), (scalar_t)0);

profiler/mpops/paddle_ext_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from paddle_ext import unsorted_segment_sum
33

44
src = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32', stop_gradient=False)
5-
## TODO: it still successfully run, but it will get wrong answer.
5+
## TODO: it still successfully run, but it will get wrong answer on GPU.
66
# src = paddle.to_tensor([1, 2, 3, 4, 5, 6], dtype='float32').reshape((2, 3))
77
index = paddle.to_tensor([0, 1, 0], dtype=paddle.int64)
88
out = unsorted_segment_sum(src, index, 3)

0 commit comments

Comments
 (0)