Skip to content

Commit

Permalink
Merge pull request llvm#80 from AMD-Lightning-Internal/amd/dev/animku…
Browse files Browse the repository at this point in the history
…ma/xteam-scan-type-generic

[OpenMP][Clang][DeviceRTL] Support Multiple Datatypes for Xteam Scan
  • Loading branch information
ronlieb authored Jan 15, 2025
2 parents 03c4267 + 755f808 commit 5eda221
Show file tree
Hide file tree
Showing 8 changed files with 3,957 additions and 55 deletions.
305 changes: 283 additions & 22 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Large diffs are not rendered by default.

57 changes: 30 additions & 27 deletions clang/lib/CodeGen/CGStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
assert(Itr != RedVarMap.end() && "Metadata not found");

const CodeGenModule::XteamRedVarInfo &RVI = Itr->second;
llvm::Type *RedVarType = ConvertTypeForMem(XteamVD->getType());

assert(RVI.ArgPos + 1 < Args->size() && "Arg position beyond bounds");

Expand All @@ -568,9 +569,9 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
// {
// RedVar += TeamVals[TeamID - 1]
// }

Address ScanStorageValGEP = Address(
Builder.CreateGEP(Int32Ty, DScanStorage, GlobalGpuThreadId), Int32Ty,
Builder.CreateGEP(RedVarType, DScanStorage, GlobalGpuThreadId),
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // Storage[GlobalTID]
Builder.CreateStore(Builder.CreateLoad(ScanStorageValGEP),
Expand All @@ -586,10 +587,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
EmitBlock(IsAfterFirstTeamThenBlock);
Address PrevTeamValGEP =
Address(Builder.CreateGEP(
Int32Ty, DTeamVals,
RedVarType, DTeamVals,
Builder.CreateSub(WorkGroupId,
llvm::ConstantInt::get(Int32Ty, 1))),
Int32Ty,
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // TeamVals[TeamID - 1]
Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(RVI.RedVarAddr),
Expand All @@ -614,7 +615,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
// }
// }

Builder.CreateStore(llvm::ConstantInt::get(Int32Ty, 0),
Builder.CreateStore(llvm::ConstantInt::get(RedVarType, 0),
RVI.RedVarAddr); // RedVar = 0
llvm::Value *IsNotFirstThread = Builder.CreateICmpUGE(
GlobalGpuThreadId,
Expand All @@ -630,8 +631,8 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
GlobalGpuThreadId,
llvm::ConstantInt::get(Int32Ty, 1)); // GlobalTID - 1
Address ScanStoragePrevValGEP = Address(
Builder.CreateGEP(Int32Ty, DScanStorage, PrevGlobalGpuThreadId),
Int32Ty,
Builder.CreateGEP(RedVarType, DScanStorage, PrevGlobalGpuThreadId),
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // Storage[GlobalTID - 1]
Builder.CreateStore(Builder.CreateLoad(ScanStoragePrevValGEP),
Expand All @@ -656,10 +657,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
EmitBlock(IsNotFirstThreadInTeamThenBlock);
Address PrevTeamValGEP =
Address(Builder.CreateGEP(
Int32Ty, DTeamVals,
RedVarType, DTeamVals,
Builder.CreateSub(WorkGroupId,
llvm::ConstantInt::get(Int32Ty, 1))),
Int32Ty,
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // TeamVals[TeamID - 1]
Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(RVI.RedVarAddr),
Expand All @@ -676,10 +677,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
EmitBlock(IsAfterSecondTeamThenBlock);
Address PrevPrevTeamValGEP =
Address(Builder.CreateGEP(
Int32Ty, DTeamVals,
RedVarType, DTeamVals,
Builder.CreateSub(WorkGroupId,
llvm::ConstantInt::get(Int32Ty, 2))),
Int32Ty,
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // TeamVals[TeamID - 2]
Builder.CreateStore(
Expand Down Expand Up @@ -2307,12 +2308,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,

llvm::Value *SegmentLoopUB = nullptr;
llvm::Value *DSegmentVals = nullptr;
llvm::Value *ThreadLevelRes = nullptr;
llvm::Value *GlobalUpperBound = nullptr;
const Address *RedVarAddr = nullptr;
llvm::BasicBlock *ExecBB = nullptr;
llvm::BasicBlock *DoneBB = nullptr;
clang::QualType RedVarType;
const clang::VarDecl *XteamVD;
llvm::Type *RedVarType;
if (getLangOpts().OpenMPIsTargetDevice && CGM.isXteamSegmentedScanKernel()) {
// Compute Loop trip-count (N) = GlobalUB - GlobalLB + 1
const auto UBLValue = EmitLValue(
Expand Down Expand Up @@ -2368,19 +2369,19 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
Builder.CreateMul(SegmentSizeForScan, GlobalGpuThreadId),
BigJumpLoopIvAddr); // *iv = GlobalTID * Seg_Size

// Every thread loops till just before the SegmentLoopUB = (GlobaTID + 1) *
// Seg_Size
// Every thread loops till just before the SegmentLoopUB:
// SegmentLoopUB = (GlobaTID + 1) * Seg_Size
SegmentLoopUB = Builder.CreateMul(
SegmentSizeForScan,
Builder.CreateAdd(GlobalGpuThreadId,
llvm::ConstantInt::get(Int32Ty, 1)));

auto XteamVD = *(CGM.getXteamOrderedRedVar(&S).begin());
XteamVD = *(CGM.getXteamOrderedRedVar(&S).begin());
RedVarType = ConvertTypeForMem(XteamVD->getType());
const CodeGenModule::XteamRedVarMap &RedVarMap = CGM.getXteamRedVarMap(&S);
const CodeGenModule::XteamRedVarInfo &RVI =
(RedVarMap.find(XteamVD))->second;
RedVarAddr = &(RVI.RedVarAddr);
RedVarType = XteamVD->getType();

// SegmentValsAddr points to the SegmentVals array which will store the
// intermediate scan results computed per segment by a single thread
Expand Down Expand Up @@ -2520,11 +2521,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
if (!CGM.isXteamScanPhaseOne) {
// SegmentVals contains the final scanned results computed for every
// element in a segment.
Address SegmentValsGEP = Address(
Builder.CreateGEP(Int32Ty, DSegmentVals,
Builder.CreateLoad(BigJumpLoopIvAddr)),
Int32Ty,
getContext().getTypeAlignInChars(RedVarType)); // SegmentVals[*iv]
Address SegmentValsGEP =
Address(Builder.CreateGEP(RedVarType, DSegmentVals,
Builder.CreateLoad(BigJumpLoopIvAddr)),
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // SegmentVals[*iv]
// emit redvar = SegmentVals[omp.iv]
Builder.CreateStore(Builder.CreateLoad(SegmentValsGEP), *RedVarAddr);
}
Expand All @@ -2548,11 +2550,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
(CGM.isXteamRedKernel(&S) || CGM.isBigJumpLoopKernel(&S))) {
if (CGM.isXteamSegmentedScanKernel()) {
EmitBlock(Continue.getBlock());
Address SegmentValsGEP = Address(
Builder.CreateGEP(Int32Ty, DSegmentVals,
Builder.CreateLoad(BigJumpLoopIvAddr)),
Int32Ty,
getContext().getTypeAlignInChars(RedVarType)); // Segment_Vals[*iv]
Address SegmentValsGEP =
Address(Builder.CreateGEP(RedVarType, DSegmentVals,
Builder.CreateLoad(BigJumpLoopIvAddr)),
RedVarType,
getContext().getTypeAlignInChars(
XteamVD->getType())); // Segment_Vals[*iv]
Builder.CreateStore(Builder.CreateLoad(*RedVarAddr),
SegmentValsGEP); // Segment_Vals[*iv] = red_var
llvm::Value *SegmentScanLoopInc =
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,10 @@ void CodeGenFunction::InitializeXteamRedCapturedVars(
llvm::Value *DScanStorageInst =
Builder.CreateAlloca(RedVarType, nullptr, "d_scan_storage");
Address DScanStorageAddr(
DScanStorageInst, Int32Ty,
DScanStorageInst, RedVarType,
Context.getTypeAlignInChars(Context.UnsignedIntTy));
llvm::Value *NullPtrDScanStorage =
llvm::ConstantPointerNull::get(Int32Ty->getPointerTo());
llvm::ConstantPointerNull::get(RedVarType->getPointerTo());
Builder.CreateStore(NullPtrDScanStorage, DScanStorageAddr);

assert(DScanStorageInst && "Device scan storage pointer cannot be null");
Expand All @@ -428,10 +428,10 @@ void CodeGenFunction::InitializeXteamRedCapturedVars(
llvm::Value *DSegmentValsInst =
Builder.CreateAlloca(RedVarType, nullptr, "d_segment_vals");
Address DSegmentValsAddr(
DSegmentValsInst, Int32Ty,
DSegmentValsInst, RedVarType,
Context.getTypeAlignInChars(Context.UnsignedIntTy));
llvm::Value *NullPtrDSegmentVals =
llvm::ConstantPointerNull::get(Int32Ty->getPointerTo());
llvm::ConstantPointerNull::get(RedVarType->getPointerTo());
Builder.CreateStore(NullPtrDSegmentVals, DSegmentValsAddr);

assert(DSegmentValsInst && "Segment Vals Array pointer cannot be null");
Expand Down
Loading

0 comments on commit 5eda221

Please sign in to comment.