7
7
8
8
#pragma once
9
9
10
- #include < cuda .h>
11
- #include < device_functions.h>
10
+ #include < hip/hip_runtime .h>
11
+ #include < hip/ device_functions.h>
12
12
13
13
namespace faiss {
14
14
namespace gpu {
15
15
16
16
#ifdef USE_ROCM
17
17
18
- #define GET_BITFIELD_U32 (OUT, VAL, POS, LEN )
19
-
20
- #define GET_BITFIELD_U64 (OUT, VAL, POS, LEN )
21
-
22
- __device__ __forceinline__ unsigned int getBitfield (
23
- unsigned int val,
24
- int pos,
25
- int len) {
26
- unsigned int ret{0 };
27
- return ret;
18
+ #define GET_BITFIELD_U32 (OUT, VAL, POS, LEN ) \
19
+ do { \
20
+ OUT = getBitfield ((uint32_t )VAL, POS, LEN); \
21
+ } while (0 )
22
+
23
+ #define GET_BITFIELD_U64 (OUT, VAL, POS, LEN ) \
24
+ do { \
25
+ OUT = getBitfield ((uint64_t )VAL, POS, LEN); \
26
+ } while (0 )
27
+
28
+ // Taken from https://github.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/blob/rocm-5.5.0/amd_openvx/openvx/ago/ago_util_opencl.cpp#L1563
29
+ __device__ __forceinline__ uint32_t
30
+ getBitfield (uint32_t val, int pos, int len) {
31
+ if (len == 0 )
32
+ return 0 ;
33
+ if (pos + len < 32 )
34
+ return (val << (32 - pos - len)) >> (32 - len);
35
+ return val >> pos;
28
36
}
29
37
30
38
__device__ __forceinline__ uint64_t
31
39
getBitfield (uint64_t val, int pos, int len) {
32
- uint64_t ret{0 };
33
- return ret;
40
+ if (len == 0 )
41
+ return 0 ;
42
+ if (pos + len < 64 )
43
+ return (val << (64 - pos - len)) >> (64 - len);
44
+ return val >> pos;
34
45
}
35
46
36
47
__device__ __forceinline__ unsigned int setBitfield (
@@ -39,6 +50,7 @@ __device__ __forceinline__ unsigned int setBitfield(
39
50
int pos,
40
51
int len) {
41
52
unsigned int ret{0 };
53
+ printf (" Runtime Error of %s: Unimplemented\n " , __PRETTY_FUNCTION__);
42
54
return ret;
43
55
}
44
56
0 commit comments