-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP][MLIR] Descriptor explicit member map lowering changes #96265
Conversation
Created using spr 1.3.4
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-mlir-openmp Author: None (agozillon) ChangesThis is one of 3 PRs in a PR stack that aims to add support for explicit mapping of The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes, There are also additions of tests which should help to showcase the affect Patch is 46.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96265.diff 3 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cbfc64972f38b..a85dd6aafedad 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2245,10 +2245,13 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
int firstMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
- lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
- builder.getPtrTy());
int lastMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
+
+ // NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to
+ // avoid offset or any manipulations interfering with the calculation.
+ lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
+ builder.getPtrTy());
highAddr = builder.CreatePointerCast(
builder.CreateGEP(mapData.BaseType[lastMemberIdx],
mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
@@ -2331,6 +2334,24 @@ static void processMapMembersWithParent(
assert(memberDataIdx >= 0 && "could not find mapped member of structure");
+ if (checkIfPointerMap(memberClause)) {
+ auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
+ memberClause.getMapType().value());
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.DevicePointers.emplace_back(
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+ combinedInfo.Names.emplace_back(
+ LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataIndex]);
+ combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
+ combinedInfo.Sizes.emplace_back(builder.getInt64(
+ moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
+ }
+
// Same MemberOfFlag to indicate its link with parent and other members
// of.
auto mapFlag =
@@ -2346,7 +2367,14 @@ static void processMapMembersWithParent(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
combinedInfo.Names.emplace_back(
LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
+
+ if (checkIfPointerMap(memberClause))
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[memberDataIdx]);
+ else
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataIndex]);
+
combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
new file mode 100644
index 0000000000000..e36caefe9afc4
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
@@ -0,0 +1,333 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// This test checks the offload sizes, map types and base pointers and pointers
+// provided to the OpenMP kernel argument structure are correct when lowering
+// to LLVM-IR from MLIR when performing explicit member mapping of a record type
+// that includes fortran allocatables in various locations of the record types
+// hierarchy.
+
+module attributes {omp.is_target_device = false} {
+ llvm.func @omp_map_derived_type_allocatable_member(%arg0: !llvm.ptr) {
+ %0 = llvm.mlir.constant(4 : index) : i64
+ %1 = llvm.mlir.constant(1 : index) : i64
+ %2 = llvm.mlir.constant(0 : index) : i64
+ %3 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%2 : i64) {stride_in_bytes = true}
+ %4 = llvm.getelementptr %arg0[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %5 = llvm.getelementptr %4[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %6 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) var_ptr_ptr(%5 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%3) -> !llvm.ptr {name = ""}
+ %7 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"}
+ %8 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%7, %6 : [4,-1], [4,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
+ omp.target map_entries(%7 -> %arg1, %6 -> %arg2, %8 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr):
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @omp_allocatable_derived_type_member_map(%arg0: !llvm.ptr) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i32) : i32
+ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.mlir.constant(5 : index) : i64
+ %5 = llvm.mlir.constant(4 : index) : i64
+ %6 = llvm.mlir.constant(1 : index) : i64
+ %7 = llvm.mlir.constant(0 : index) : i64
+ %8 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%5 : i64) extent(%5 : i64) stride(%6 : i64) start_idx(%7 : i64) {stride_in_bytes = true}
+ %9 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ llvm.store %9, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
+ %10 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr
+ %12 = llvm.getelementptr %11[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %13 = llvm.getelementptr %12[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %14 = omp.map.info var_ptr(%12 : !llvm.ptr, i32) var_ptr_ptr(%13 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%8) -> !llvm.ptr {name = ""}
+ %15 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%array_j"}
+ %16 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ llvm.store %16, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
+ %17 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ %18 = llvm.load %17 : !llvm.ptr -> !llvm.ptr
+ %19 = llvm.getelementptr %18[0, 5] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %20 = omp.map.info var_ptr(%19 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%k"}
+ %21 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ %22 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_allocatable_derived_type_map_operand_and_block_additionTone_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>) var_ptr_ptr(%21 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %23 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%22, %15, %14, %20 : [0,-1,-1], [0,4,-1], [0,4,0], [0,5,-1] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"}
+ omp.target map_entries(%22 -> %arg1, %15 -> %arg2, %14 -> %arg3, %20 -> %arg4, %23 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.ptr, %arg5: !llvm.ptr):
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @omp_alloca_nested_derived_type_map(%arg0: !llvm.ptr) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i32) : i32
+ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.mlir.constant(3 : index) : i64
+ %5 = llvm.mlir.constant(4 : index) : i64
+ %6 = llvm.mlir.constant(6 : index) : i64
+ %7 = llvm.mlir.constant(1 : index) : i64
+ %8 = llvm.mlir.constant(2 : index) : i64
+ %9 = llvm.mlir.constant(0 : index) : i64
+ %10 = omp.map.bounds lower_bound(%9 : i64) upper_bound(%5 : i64) extent(%5 : i64) stride(%7 : i64) start_idx(%9 : i64) {stride_in_bytes = true}
+ %11 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ llvm.store %11, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
+ %12 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ %13 = llvm.load %12 : !llvm.ptr -> !llvm.ptr
+ %14 = llvm.getelementptr %13[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
+ %15 = llvm.getelementptr %14[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %16 = llvm.getelementptr %15[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %17 = omp.map.info var_ptr(%15 : !llvm.ptr, i32) var_ptr_ptr(%16 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr {name = ""}
+ %18 = omp.map.info var_ptr(%15 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
+ %19 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ llvm.store %19, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>, !llvm.ptr
+ %20 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ %21 = llvm.load %20 : !llvm.ptr -> !llvm.ptr
+ %22 = llvm.getelementptr %21[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
+ %23 = llvm.getelementptr %22[0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %24 = omp.map.info var_ptr(%23 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%k"}
+ %25 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
+ %26 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) var_ptr_ptr(%25 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %27 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>) map_clauses(tofrom) capture(ByRef) members(%26, %18, %17, %24 : [0,-1,-1,-1], [0,6,2,-1], [0,6,2,0], [0,6,3,-1] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l"}
+ omp.target map_entries(%26 -> %arg1, %18 -> %arg2, %17 -> %arg3, %24 -> %arg4, %27 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.ptr, %arg5: !llvm.ptr):
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) {
+ %0 = llvm.mlir.constant(4 : index) : i64
+ %1 = llvm.mlir.constant(1 : index) : i64
+ %2 = llvm.mlir.constant(2 : index) : i64
+ %3 = llvm.mlir.constant(0 : index) : i64
+ %4 = llvm.mlir.constant(6 : index) : i64
+ %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true}
+ %6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
+ %7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""}
+ %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
+ %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2,-1], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
+ omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr):
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 136, i64 48, i64 8, i64 20, i64 4]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 240, i64 48, i64 8, i64 20, i64 4]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 281474976710659]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
+
+// CHECK: define void @omp_map_derived_type_allocatable_member(ptr %[[ARG:.*]]) {
+
+// CHECK: %[[DTYPE_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer, ptr %[[ARG]], i32 0, i32 4
+// CHECK: %[[ALLOCATABLE_MEMBER_BADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
+
+// CHECK: %[[LOAD_ALLOCATABLE_MEMBER_BADDR:.*]] = load ptr, ptr %[[ALLOCATABLE_MEMBER_BADDR]], align 8
+// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ALLOCATABLE_MEMBER_BADDR]], i64 0
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], i64 1
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]] to i64
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[DTYPE_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
+
+// CHECK: %[[BASE_PTRS:.*...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please reduce this test to the absolute minimum that still tests your change. The IR does not need to compute anything useful, but the test should fail when you would revert only your C++ changes.
Additionally, try to reduce the CHECK
s to a minimum to not make the test exceptionally sensitive to all kinds of changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, I believe in this case it is at the point of being minimal, I could reduce it to 1 of the 4 test cases if you wish keeping the most complicated or if you have any other suggestions on how to reduce it I would be more than happy to adjust the test.
The test is checking the correct information is accessed and inserted into the OpenMP map information construct so it unfortunately requires a number of CHECK's, most of the code gen is handled via OpenMPToLLVMIRTranslation.cpp and the OMPIRBuilder.cpp (although it is of course susceptible to modifications in the MLIR -> LLVM-IR lowering and LLVM changes like most other tests). The MLIR itself requires a number of omp.map.info operations to test it, with a somewhat sane approximation of what it would expect as an argument (in this case accesses into a larger structure). The bounds aren't entirely necessary for the example though (mainly just checking the array offsets work in conjunction with everything still), so I could remove them if you wish which might lower the number of constants as well?
…t map type when creating secondary parent mapping (TODO note left ffor other changes still required) Created using spr 1.3.4
llvm#96265 changes are required for this to work.
[OpenMP] Fix use_device_ptr(addr) mappings for Fortran Pointer types Fixes test in https://ontrack-internal.amd.com/browse/SWDEV-471469. This fix also depends on @agozillon patch llvm#96265.
This is one of 3 PRs in a PR stack that aims to add support for explicit mapping of allocatable members in derived types. The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes, which are small and seek to alter the current member mapping to add an additional map insertion for pointers. Effectively, if the member is a pointer (currently indicated by having a varPtrPtr field) we add an additional map for the pointer and then alter the subsequent mapping of the member (the data) to utilise the member rather than the parents base pointer. This appears to be necessary in certain cases when mapping pointer data within record types to avoid segfaulting on device (due to incorrect data mapping). In general this record type mapping may be simplifiable in the future. There are also additions of tests which should help to showcase the affect of the changes above. Pull Request: llvm#96265
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Andrew, this looks reasonable to me, though I'd suggest waiting for another approval before merging.
Also, it looks like the flang/test/Integration/OpenMP/map-types-and-sizes.f90 test needs to be updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with the other comments addressed :)
Thank you, I'll add the comment and simplify the test as previously requested by another reviewer :-) However, the test doesn't need to be updated I believe, it's just a side affect of this being the middle PR stack! That test unfortunately is sensitive to both MLIR -> LLVM-IR changes and Flang -> MLIR changes as it tests the full lowering process! And as it's in Flang's test infrastructure it's in the Flang portion of the changes, you should see it pass (alongside all other tests) in: #96266 which is built on top of this via SPR! |
Created using spr 1.3.4
Thank you for the approvals and comments! I updated the PR to add the requested comment as well as shortened the test as requested to a more minimal test. |
Likely also worth stating the PR has some further modifications to support swapping from DenseIntElementsAttr -> ArrayAttr<ArrayAttr>> which simplifies the implementation a chunk by no longer requiring the -1 padding (thank you very much for the suggestions @skatrak :-) )! So perhaps worth another skim over the PR. |
Created using spr 1.3.4
This is one of 3 PRs in a PR stack that aims to add support for explicit mapping of allocatable members in derived types. The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes, which are small and seek to alter the current member mapping to add an additional map insertion for pointers. Effectively, if the member is a pointer (currently indicated by having a varPtrPtr field) we add an additional map for the pointer and then alter the subsequent mapping of the member (the data) to utilise the member rather than the parents base pointer. This appears to be necessary in certain cases when mapping pointer data within record types to avoid segfaulting on device (due to incorrect data mapping). In general this record type mapping may be simplifiable in the future. There are also additions of tests which should help to showcase the affect of the changes above. Pull Request: llvm#96265
This is one of 3 PRs in a PR stack that aims to add support for explicit mapping of allocatable members in derived types. The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes, which are small and seek to alter the current member mapping to add an additional map insertion for pointers. Effectively, if the member is a pointer (currently indicated by having a varPtrPtr field) we add an additional map for the pointer and then alter the subsequent mapping of the member (the data) to utilise the member rather than the parents base pointer. This appears to be necessary in certain cases when mapping pointer data within record types to avoid segfaulting on device (due to incorrect data mapping). In general this record type mapping may be simplifiable in the future. There are also additions of tests which should help to showcase the affect of the changes above. Pull Request: llvm#96265
Small ping for some reviewer attention on this PR please, if at all possible. Just to make sure the current PR approvals stand after recent modifications. Thank you ahead of time as always! :-) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Andrew for working on my previous comments. I have a couple of suggestions to hopefully help simplify things a bit further, but it LGTM. No need to wait for another look by me before merging.
DenseIntElementsAttr &membersIdx) { | ||
SmallVector<APInt> values; | ||
ArrayAttr &membersIdx) { | ||
SmallVector<Attribute, 4> values, memberIdxs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Is there a reason for the magic number "4" here? If not, it's generally preferred to leave the default (or zero, if it refuses to give you a compile-time default).
if (!membersIdx) | ||
return; | ||
|
||
for (int i = 0; i < shape[0]; ++i) { | ||
for (size_t i = 0; i < membersIdx.getValue().size(); i++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I think it would be useful to use llvm::join
for the inner loop to improve readability. Then, using llvm::enumerate
for the outer loop would probably help as well.
size_t smallestMember = memberIndicesA.size() < memberIndicesB.size() | ||
? memberIndicesA.size() | ||
: memberIndicesB.size(); | ||
for (size_t i = 0; i < smallestMember; ++i) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I think llvm::zip
could simplify this a bit, since it already implements iterating over two ranges of potentially different sizes until the end of the shortest one is reached.
// subsequent map to the pointer, this segment of code generates the | ||
// pointer mapping. This pointer map can in certain cases be optimised | ||
// out as Clang currently does in its lowering, however, for the moment |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// subsequent map to the pointer, this segment of code generates the | |
// pointer mapping. This pointer map can in certain cases be optimised | |
// out as Clang currently does in its lowering, however, for the moment | |
// subsequent map to the pointer. This segment of code generates the | |
// pointer mapping, which can in certain cases be optimised out as Clang | |
// currently does in its lowering. However, for the moment |
// we do not do so, in part as we have substantially less information on | ||
// the data being mapped at this stage; at least for the moment. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// we do not do so, in part as we have substantially less information on | |
// the data being mapped at this stage; at least for the moment. | |
// we do not do so, in part as we currently have substantially less information | |
// on the data being mapped at this stage. |
Going to close this current version of the PR stack and open a new one with the changes requested incorporated alongside some newer additions to add support for indexing members at arbitrary depths and some other general fixes, unfortunately lost track of what is in the PR stack and what isn't after working downstream on it for so long, so easier to start fresh to make sure nothing is missed. Incredibly sorry for the bother. |
This is one of 3 PRs in a PR stack that aims to add support for explicit mapping of
allocatable members in derived types.
The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes,
which are small and seek to alter the current member mapping to add an
additional map insertion for pointers. Effectively, if the member is a pointer
(currently indicated by having a varPtrPtr field) we add an additional map for
the pointer and then alter the subsequent mapping of the member (the data)
to utilise the member rather than the parents base pointer. This appears to be
necessary in certain cases when mapping pointer data within record types to
avoid segfaulting on device (due to incorrect data mapping). In general this
record type mapping may be simplifiable in the future.
There are also additions of tests which should help to showcase the affect
of the changes above.