Skip to content

Commit

Permalink
[libc] Partially implement 'rand' for the GPU
Browse files Browse the repository at this point in the history
Summary:
This patch partially implements the `rand` function on the GPU. This is
partial because the GPU currently doesn't support thread local storage
or static initializers. To implement this on the GPU. I use 1/8th of the
local / shared memory quota to treak the shared memory as thread local
storage. This is done by simply allocating enough storage for each
thread in the block and indexing into this based off of the thread id.
The downside to this is that it does not initialize `srand` correctly to
be `1` as the standard says, it is also wasteful. In the future we
should figure out a way to support TLS on the GPU so that this can be
completely common and less resource intensive.
  • Loading branch information
jhuber6 committed Oct 5, 2023
1 parent 75e6480 commit 970880c
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 4 deletions.
2 changes: 2 additions & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.lldiv
libc.src.stdlib.qsort
libc.src.stdlib.qsort_r
libc.src.stdlib.rand
libc.src.stdlib.srand
libc.src.stdlib.strtod
libc.src.stdlib.strtof
libc.src.stdlib.strtol
Expand Down
10 changes: 6 additions & 4 deletions libc/src/stdlib/rand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ namespace LIBC_NAMESPACE {
// An implementation of the xorshift64star pseudo random number generator. This
// is a good general purpose generator for most non-cryptographics applications.
LLVM_LIBC_FUNCTION(int, rand, (void)) {
rand_next ^= rand_next >> 12;
rand_next ^= rand_next << 25;
rand_next ^= rand_next >> 27;
return static_cast<int>((rand_next * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
unsigned long x = rand_next;
x ^= x >> 12;
x ^= x << 25;
x ^= x >> 27;
rand_next = x;
return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
}

} // namespace LIBC_NAMESPACE
6 changes: 6 additions & 0 deletions libc/src/stdlib/rand_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@

namespace LIBC_NAMESPACE {

#ifdef LIBC_TARGET_ARCH_IS_GPU
// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
// a standard compliant default value.
ThreadLocal<unsigned long> rand_next;
#else
// C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
// the 'srand' function was called with a value of '1'.
LIBC_THREAD_LOCAL unsigned long rand_next = 1;
#endif

} // namespace LIBC_NAMESPACE
22 changes: 22 additions & 0 deletions libc/src/stdlib/rand_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,33 @@
#ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
#define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H

#include "src/__support/GPU/utils.h"
#include "src/__support/macros/attributes.h"

namespace LIBC_NAMESPACE {

#ifdef LIBC_TARGET_ARCH_IS_GPU
// Implement thread local storage on the GPU using local memory. Each thread
// gets its slot in the local memory array and is private to the group.
// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
// inefficient and incomplete stand-in until that is done.
template <typename T> class ThreadLocal {
private:
static constexpr long MAX_THREADS = 1024;
[[clang::loader_uninitialized]] static inline gpu::Local<T>
storage[MAX_THREADS];

public:
LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
LIBC_INLINE void operator=(const T &value) {
storage[gpu::get_thread_id()] = value;
}
};

extern ThreadLocal<unsigned long> rand_next;
#else
extern LIBC_THREAD_LOCAL unsigned long rand_next;
#endif

} // namespace LIBC_NAMESPACE

Expand Down
3 changes: 3 additions & 0 deletions libc/test/src/stdlib/rand_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
vals[i] = val;
}

// FIXME: The GPU implementation cannot initialize the seed correctly.
#ifndef LIBC_TARGET_ARCH_IS_GPU
// The C standard specifies that if 'srand' is never called it should behave
// as if 'srand' was called with a value of 1. If we seed the value with 1 we
// should get the same sequence as the unseeded version.
LIBC_NAMESPACE::srand(1);
for (size_t i = 0; i < 1000; ++i)
ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
#endif
}

TEST(LlvmLibcRandTest, SetSeed) {
Expand Down

0 comments on commit 970880c

Please sign in to comment.