Skip to content

Commit 8aa028b

Browse files
committed
use single thread on cpu
1 parent f4368dc commit 8aa028b

File tree

3 files changed

+29
-21
lines changed

3 files changed

+29
-21
lines changed

gammagl/mpops/paddle_ext/setup.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
33

44
cuda_macro = ('COMPILE_WITH_CUDA', None) # Paddle offer `PADDLE_WITH_CUDA` macro
5-
omp_macro = ('COMPILE_WITH_OMP', None) # Note: OpenMP needs gcc>4.2.0
6-
compile_args = {
7-
'cxx':['-fopenmp']
8-
}
5+
# omp_macro = ('COMPILE_WITH_OMP', None) # Note: OpenMP needs gcc>4.2.0
6+
# compile_args = {
7+
# 'cxx':['-fopenmp']
8+
# }
99

1010
def get_exts():
1111
if paddle.is_compiled_with_cuda():
@@ -16,17 +16,20 @@ def get_exts():
1616
'cpu/segment_sum_cpu.cpp',
1717
'cuda/segment_sum_cuda.cu',
1818
],
19-
define_macros=[omp_macro, cuda_macro],
20-
extra_compile_args=compile_args
19+
define_macros=[
20+
cuda_macro,
21+
# omp_macro,
22+
],
23+
# extra_compile_args=compile_args
2124
)
2225
else:
2326
return CppExtension(
2427
sources=[
2528
'segment_sum.cpp',
2629
'cpu/segment_sum_cpu.cpp',
2730
],
28-
define_macros=[omp_macro],
29-
extra_compile_args=compile_args
31+
# define_macros=[omp_macro],
32+
# extra_compile_args=compile_args
3033
)
3134

3235
setup(

gammagl/mpops/torch_ext/setup.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
55

66
cuda_macro = ('COMPILE_WITH_CUDA', None)
7-
omp_macro = ('COMPILE_WITH_OMP', None) # Note: OpenMP needs gcc>4.2.0
8-
compile_args = {
9-
'cxx':['-fopenmp']
10-
}
7+
# omp_macro = ('COMPILE_WITH_OMP', None) # Note: OpenMP needs gcc>4.2.0
8+
# compile_args = {
9+
# 'cxx':['-fopenmp']
10+
# }
1111

1212
def get_exts():
1313
if torch.cuda.is_available():
@@ -19,8 +19,11 @@ def get_exts():
1919
'cpu/segment_max_cpu.cpp',
2020
'cuda/segment_max_cuda.cu'
2121
],
22-
define_macros=[cuda_macro, omp_macro],
23-
extra_compile_args=compile_args
22+
define_macros=[
23+
cuda_macro,
24+
# omp_macro,
25+
],
26+
# extra_compile_args=compile_args
2427
),
2528
CUDAExtension(
2629
name='torch_gspmm', # Note: same with TORCH_LIBRARY (import)
@@ -29,8 +32,11 @@ def get_exts():
2932
'cpu/spmm_sum_cpu.cpp',
3033
'cuda/spmm_sum_cuda.cu'
3134
],
32-
define_macros=[cuda_macro, omp_macro],
33-
extra_compile_args=compile_args
35+
define_macros=[
36+
cuda_macro,
37+
# omp_macro,
38+
],
39+
# extra_compile_args=compile_args
3440
)
3541
]
3642
else:
@@ -41,17 +47,17 @@ def get_exts():
4147
'segment_max.cpp',
4248
'cpu/segment_max_cpu.cpp'
4349
],
44-
define_macros=[omp_macro],
45-
extra_compile_args=compile_args
50+
# define_macros=[omp_macro],
51+
# extra_compile_args=compile_args
4652
),
4753
CppExtension(
4854
name='torch_gspmm',
4955
sources=[
5056
'gspmm.cpp',
5157
'cpu/spmm_sum_cpu.cpp'
5258
],
53-
define_macros=[omp_macro],
54-
extra_compile_args=compile_args
59+
# define_macros=[omp_macro],
60+
# extra_compile_args=compile_args
5561
)
5662
]
5763

profiler/mpops/paddle_ext_.py

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from paddle_ext import unsorted_segment_sum
33

44
src = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32', stop_gradient=False)
5-
## TODO: it still successfully run, but it will get wrong answer on GPU.
65
# src = paddle.to_tensor([1, 2, 3, 4, 5, 6], dtype='float32').reshape((2, 3))
76
index = paddle.to_tensor([0, 1, 0], dtype=paddle.int64)
87
out = unsorted_segment_sum(src, index, 3)

0 commit comments

Comments
 (0)