From cc3d2533cc2e4ea06981b86ede5087fbf801e789 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 16 Oct 2023 16:18:27 +0200 Subject: [PATCH 1/6] [AMDGPU] Add i1 mul patterns (#67291) i1 muls can sometimes happen after SCEV. They resulted in ISel failures because we were missing the patterns for them. Solves SWDEV-423354 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 + llvm/test/CodeGen/AMDGPU/mul.ll | 403 +++++++++++++++++----- 2 files changed, 328 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cd849560feac2..9c5b166c96522 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -769,6 +769,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // extract of relevant bits. setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i1, Promote); + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index b4e9376d82777..da7645d5011fc 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -1059,6 +1059,255 @@ entry: ret void } +define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 x i32], i1 %b) nounwind { +; SI-LABEL: s_mul_i1: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dword s4, s[0:1], 0x13 +; SI-NEXT: s_load_dword s5, s[0:1], 0x1c +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mul_i32 s4, s4, s5 +; SI-NEXT: s_and_b32 s4, s4, 1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: s_mul_i1: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dword s4, s[0:1], 0x70 +; VI-NEXT: s_load_dword s5, s[0:1], 0x4c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mul_lo_u16_e32 v0, s5, v0 +; VI-NEXT: v_and_b32_e32 v0, 1, v0 +; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: s_mul_i1: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x70 +; GFX9-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mul_lo_u16_e32 v0, s3, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: s_mul_i1: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x4c +; GFX10-NEXT: s_load_dword s3, s[0:1], 0x70 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX10-NEXT: s_mov_b32 s7, 0x31016000 +; GFX10-NEXT: s_mov_b32 s6, -1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, s2, s3 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: s_mul_i1: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x4c +; GFX11-NEXT: s_load_b32 s3, s[0:1], 0x70 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mul_lo_u16 v0, s2, s3 +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-NEXT: s_mov_b32 s2, -1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; EG-LABEL: s_mul_i1: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @10, KC0[], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 72, #3 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 108, #3 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, 0.0, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MULLO_INT * T0.X, T1.X, T0.X, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, 1, +; EG-NEXT: LSHL * T0.W, PV.W, literal.x, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +entry: + %mul = mul i1 %a, %b + store i1 %mul, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: v_mul_i1: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_mul_lo_u32 v0, v0, v1 +; SI-NEXT: v_and_b32_e32 v0, 1, v0 +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_mul_i1: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_mul_lo_u16_e32 v0, v0, v1 +; VI-NEXT: v_and_b32_e32 v0, 1, v0 +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: v_mul_i1: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 +; GFX9-NEXT: s_mov_b32 s10, s6 +; GFX9-NEXT: s_mov_b32 s11, s7 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s8, s2 +; GFX9-NEXT: s_mov_b32 s9, s3 +; GFX9-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; GFX9-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s5, s1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: v_mul_i1: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_mov_b32 s6, -1 +; GFX10-NEXT: s_mov_b32 s7, 0x31016000 +; GFX10-NEXT: s_mov_b32 s10, s6 +; GFX10-NEXT: s_mov_b32 s11, s7 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_mov_b32 s8, s2 +; GFX10-NEXT: s_mov_b32 s9, s3 +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; GFX10-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4 +; GFX10-NEXT: s_mov_b32 s4, s0 +; GFX10-NEXT: s_mov_b32 s5, s1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: v_mul_i1: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_mov_b32 s6, -1 +; GFX11-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-NEXT: s_mov_b32 s10, s6 +; GFX11-NEXT: s_mov_b32 s11, s7 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s8, s2 +; GFX11-NEXT: s_mov_b32 s9, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: buffer_load_u8 v0, off, s[8:11], 0 +; GFX11-NEXT: buffer_load_u8 v1, off, s[8:11], 0 offset:4 +; GFX11-NEXT: s_mov_b32 s4, s0 +; GFX11-NEXT: s_mov_b32 s5, s1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0 +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; EG-LABEL: v_mul_i1: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 4, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MULLO_INT * T0.X, T0.X, T1.X, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, 1, +; EG-NEXT: LSHL * T0.W, PV.W, literal.x, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +entry: + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i1, ptr addrspace(1) %in + %b = load i1, ptr addrspace(1) %b_ptr + %result = mul i1 %a, %b + store i1 %result, ptr addrspace(1) %out + ret void +} + ; A standard 64-bit multiply. The expansion should be around 6 instructions. ; It would be difficult to match the expansion correctly without writing ; a really complicated list of FileCheck expressions. I don't want @@ -1213,7 +1462,7 @@ define amdgpu_kernel void @v_mul_i64(ptr addrspace(1) %out, ptr addrspace(1) %ap ; SI-NEXT: v_mul_hi_u32 v4, v2, v0 ; SI-NEXT: v_mul_lo_u32 v3, v3, v0 ; SI-NEXT: v_mul_lo_u32 v0, v2, v0 -; SI-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm @@ -1367,30 +1616,30 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s2, 0 -; SI-NEXT: s_cbranch_scc0 .LBB11_2 +; SI-NEXT: s_cbranch_scc0 .LBB13_2 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mul_i32 s6, s2, s3 ; SI-NEXT: s_mov_b64 s[4:5], 0 -; SI-NEXT: s_branch .LBB11_3 -; SI-NEXT: .LBB11_2: +; SI-NEXT: s_branch .LBB13_3 +; SI-NEXT: .LBB13_2: ; SI-NEXT: s_mov_b64 s[4:5], -1 ; SI-NEXT: ; implicit-def: $sgpr6 -; SI-NEXT: .LBB11_3: ; %Flow +; SI-NEXT: .LBB13_3: ; %Flow ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccnz .LBB11_5 +; SI-NEXT: s_cbranch_vccnz .LBB13_5 ; SI-NEXT: ; %bb.4: ; %if ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; SI-NEXT: s_branch .LBB11_6 -; SI-NEXT: .LBB11_5: +; SI-NEXT: s_branch .LBB13_6 +; SI-NEXT: .LBB13_5: ; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: .LBB11_6: ; %endif +; SI-NEXT: .LBB13_6: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -1402,18 +1651,18 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s2, 0 -; VI-NEXT: s_cbranch_scc0 .LBB11_2 +; VI-NEXT: s_cbranch_scc0 .LBB13_2 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_mul_i32 s6, s2, s3 ; VI-NEXT: s_mov_b64 s[4:5], 0 -; VI-NEXT: s_branch .LBB11_3 -; VI-NEXT: .LBB11_2: +; VI-NEXT: s_branch .LBB13_3 +; VI-NEXT: .LBB13_2: ; VI-NEXT: s_mov_b64 s[4:5], -1 ; VI-NEXT: ; implicit-def: $sgpr6 -; VI-NEXT: .LBB11_3: ; %Flow +; VI-NEXT: .LBB13_3: ; %Flow ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; VI-NEXT: s_cbranch_vccnz .LBB11_5 +; VI-NEXT: s_cbranch_vccnz .LBB13_5 ; VI-NEXT: ; %bb.4: ; %if ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -1421,10 +1670,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_mov_b32 s4, s2 ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; VI-NEXT: s_branch .LBB11_6 -; VI-NEXT: .LBB11_5: +; VI-NEXT: s_branch .LBB13_6 +; VI-NEXT: .LBB13_5: ; VI-NEXT: v_mov_b32_e32 v0, s6 -; VI-NEXT: .LBB11_6: ; %endif +; VI-NEXT: .LBB13_6: ; %endif ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -1437,18 +1686,18 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_cmp_lg_u32 s2, 0 -; GFX9-NEXT: s_cbranch_scc0 .LBB11_2 +; GFX9-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX9-NEXT: ; %bb.1: ; %else ; GFX9-NEXT: s_mul_i32 s6, s2, s3 ; GFX9-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-NEXT: s_branch .LBB11_3 -; GFX9-NEXT: .LBB11_2: +; GFX9-NEXT: s_branch .LBB13_3 +; GFX9-NEXT: .LBB13_2: ; GFX9-NEXT: s_mov_b64 s[4:5], -1 ; GFX9-NEXT: ; implicit-def: $sgpr6 -; GFX9-NEXT: .LBB11_3: ; %Flow +; GFX9-NEXT: .LBB13_3: ; %Flow ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; GFX9-NEXT: s_cbranch_vccnz .LBB11_5 +; GFX9-NEXT: s_cbranch_vccnz .LBB13_5 ; GFX9-NEXT: ; %bb.4: ; %if ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 @@ -1456,10 +1705,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_mov_b32 s4, s2 ; GFX9-NEXT: s_mov_b32 s5, s3 ; GFX9-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; GFX9-NEXT: s_branch .LBB11_6 -; GFX9-NEXT: .LBB11_5: +; GFX9-NEXT: s_branch .LBB13_6 +; GFX9-NEXT: .LBB13_5: ; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: .LBB11_6: ; %endif +; GFX9-NEXT: .LBB13_6: ; %endif ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 @@ -1473,17 +1722,17 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-NEXT: s_cbranch_scc0 .LBB11_2 +; GFX10-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX10-NEXT: ; %bb.1: ; %else ; GFX10-NEXT: s_mul_i32 s5, s2, s3 -; GFX10-NEXT: s_branch .LBB11_3 -; GFX10-NEXT: .LBB11_2: +; GFX10-NEXT: s_branch .LBB13_3 +; GFX10-NEXT: .LBB13_2: ; GFX10-NEXT: s_mov_b32 s4, -1 ; GFX10-NEXT: ; implicit-def: $sgpr5 -; GFX10-NEXT: .LBB11_3: ; %Flow +; GFX10-NEXT: .LBB13_3: ; %Flow ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX10-NEXT: s_andn2_b32 vcc_lo, exec_lo, s4 -; GFX10-NEXT: s_cbranch_vccnz .LBB11_5 +; GFX10-NEXT: s_cbranch_vccnz .LBB13_5 ; GFX10-NEXT: ; %bb.4: ; %if ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s6, -1 @@ -1491,10 +1740,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mov_b32 s4, s2 ; GFX10-NEXT: s_mov_b32 s5, s3 ; GFX10-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; GFX10-NEXT: s_branch .LBB11_6 -; GFX10-NEXT: .LBB11_5: +; GFX10-NEXT: s_branch .LBB13_6 +; GFX10-NEXT: .LBB13_5: ; GFX10-NEXT: v_mov_b32_e32 v0, s5 -; GFX10-NEXT: .LBB11_6: ; %endif +; GFX10-NEXT: .LBB13_6: ; %endif ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_mov_b32 s2, -1 @@ -1508,17 +1757,17 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-NEXT: s_cbranch_scc0 .LBB11_2 +; GFX11-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX11-NEXT: ; %bb.1: ; %else ; GFX11-NEXT: s_mul_i32 s5, s2, s3 -; GFX11-NEXT: s_branch .LBB11_3 -; GFX11-NEXT: .LBB11_2: +; GFX11-NEXT: s_branch .LBB13_3 +; GFX11-NEXT: .LBB13_2: ; GFX11-NEXT: s_mov_b32 s4, -1 ; GFX11-NEXT: ; implicit-def: $sgpr5 -; GFX11-NEXT: .LBB11_3: ; %Flow +; GFX11-NEXT: .LBB13_3: ; %Flow ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-NEXT: s_cbranch_vccnz .LBB11_5 +; GFX11-NEXT: s_cbranch_vccnz .LBB13_5 ; GFX11-NEXT: ; %bb.4: ; %if ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 @@ -1526,10 +1775,10 @@ define amdgpu_kernel void @mul32_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mov_b32 s4, s2 ; GFX11-NEXT: s_mov_b32 s5, s3 ; GFX11-NEXT: buffer_load_b32 v0, off, s[4:7], 0 -; GFX11-NEXT: s_branch .LBB11_6 -; GFX11-NEXT: .LBB11_5: +; GFX11-NEXT: s_branch .LBB13_6 +; GFX11-NEXT: .LBB13_5: ; GFX11-NEXT: v_mov_b32_e32 v0, s5 -; GFX11-NEXT: .LBB11_6: ; %endif +; GFX11-NEXT: .LBB13_6: ; %endif ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 @@ -1601,7 +1850,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_cmp_ne_u64_e64 s[10:11], s[4:5], 0 ; SI-NEXT: s_and_b64 vcc, exec, s[10:11] -; SI-NEXT: s_cbranch_vccz .LBB12_4 +; SI-NEXT: s_cbranch_vccz .LBB14_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mul_hi_u32 v0, s4, v0 @@ -1612,22 +1861,22 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; SI-NEXT: v_add_i32_e32 v1, vcc, s5, v0 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; SI-NEXT: s_cbranch_vccnz .LBB12_3 -; SI-NEXT: .LBB12_2: ; %if +; SI-NEXT: s_cbranch_vccnz .LBB14_3 +; SI-NEXT: .LBB14_2: ; %if ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; SI-NEXT: .LBB12_3: ; %endif +; SI-NEXT: .LBB14_3: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm -; SI-NEXT: .LBB12_4: +; SI-NEXT: .LBB14_4: ; SI-NEXT: ; implicit-def: $vgpr0_vgpr1 -; SI-NEXT: s_branch .LBB12_2 +; SI-NEXT: s_branch .LBB14_2 ; ; VI-LABEL: mul64_in_branch: ; VI: ; %bb.0: ; %entry @@ -1635,7 +1884,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_mov_b64 s[8:9], 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u64 s[4:5], 0 -; VI-NEXT: s_cbranch_scc0 .LBB12_4 +; VI-NEXT: s_cbranch_scc0 .LBB14_4 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mad_u64_u32 v[0:1], s[10:11], s4, v0, 0 @@ -1644,22 +1893,22 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; VI-NEXT: s_mul_i32 s4, s5, s6 ; VI-NEXT: v_add_u32_e32 v1, vcc, s4, v1 ; VI-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; VI-NEXT: s_cbranch_vccnz .LBB12_3 -; VI-NEXT: .LBB12_2: ; %if +; VI-NEXT: s_cbranch_vccnz .LBB14_3 +; VI-NEXT: .LBB14_2: ; %if ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_mov_b32 s4, s2 ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; VI-NEXT: .LBB12_3: ; %endif +; VI-NEXT: .LBB14_3: ; %endif ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm -; VI-NEXT: .LBB12_4: +; VI-NEXT: .LBB14_4: ; VI-NEXT: ; implicit-def: $vgpr0_vgpr1 -; VI-NEXT: s_branch .LBB12_2 +; VI-NEXT: s_branch .LBB14_2 ; ; GFX9-LABEL: mul64_in_branch: ; GFX9: ; %bb.0: ; %entry @@ -1667,7 +1916,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_mov_b64 s[8:9], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX9-NEXT: s_cbranch_scc0 .LBB12_3 +; GFX9-NEXT: s_cbranch_scc0 .LBB14_3 ; GFX9-NEXT: ; %bb.1: ; %else ; GFX9-NEXT: s_mul_i32 s7, s4, s7 ; GFX9-NEXT: s_mul_hi_u32 s10, s4, s6 @@ -1676,21 +1925,21 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_add_i32 s5, s7, s5 ; GFX9-NEXT: s_mul_i32 s4, s4, s6 ; GFX9-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; GFX9-NEXT: s_cbranch_vccnz .LBB12_4 -; GFX9-NEXT: .LBB12_2: ; %if +; GFX9-NEXT: s_cbranch_vccnz .LBB14_4 +; GFX9-NEXT: .LBB14_2: ; %if ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_mov_b32 s4, s2 ; GFX9-NEXT: s_mov_b32 s5, s3 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; GFX9-NEXT: s_branch .LBB12_5 -; GFX9-NEXT: .LBB12_3: +; GFX9-NEXT: s_branch .LBB14_5 +; GFX9-NEXT: .LBB14_3: ; GFX9-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GFX9-NEXT: s_branch .LBB12_2 -; GFX9-NEXT: .LBB12_4: +; GFX9-NEXT: s_branch .LBB14_2 +; GFX9-NEXT: .LBB14_4: ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: .LBB12_5: ; %endif +; GFX9-NEXT: .LBB14_5: ; %endif ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1702,7 +1951,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX10-NEXT: s_cbranch_scc0 .LBB12_3 +; GFX10-NEXT: s_cbranch_scc0 .LBB14_3 ; GFX10-NEXT: ; %bb.1: ; %else ; GFX10-NEXT: s_mul_i32 s7, s4, s7 ; GFX10-NEXT: s_mul_hi_u32 s8, s4, s6 @@ -1711,22 +1960,22 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mul_i32 s4, s4, s6 ; GFX10-NEXT: s_add_i32 s5, s7, s5 ; GFX10-NEXT: s_mov_b32 s6, 0 -; GFX10-NEXT: s_cbranch_execnz .LBB12_4 -; GFX10-NEXT: .LBB12_2: ; %if +; GFX10-NEXT: s_cbranch_execnz .LBB14_4 +; GFX10-NEXT: .LBB14_2: ; %if ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: s_mov_b32 s4, s2 ; GFX10-NEXT: s_mov_b32 s5, s3 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; GFX10-NEXT: s_branch .LBB12_5 -; GFX10-NEXT: .LBB12_3: +; GFX10-NEXT: s_branch .LBB14_5 +; GFX10-NEXT: .LBB14_3: ; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GFX10-NEXT: s_branch .LBB12_2 -; GFX10-NEXT: .LBB12_4: +; GFX10-NEXT: s_branch .LBB14_2 +; GFX10-NEXT: .LBB14_4: ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: .LBB12_5: ; %endif +; GFX10-NEXT: .LBB14_5: ; %endif ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_mov_b32 s2, -1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1738,7 +1987,7 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX11-NEXT: s_cbranch_scc0 .LBB12_3 +; GFX11-NEXT: s_cbranch_scc0 .LBB14_3 ; GFX11-NEXT: ; %bb.1: ; %else ; GFX11-NEXT: s_mul_i32 s7, s4, s7 ; GFX11-NEXT: s_mul_hi_u32 s8, s4, s6 @@ -1747,21 +1996,21 @@ define amdgpu_kernel void @mul64_in_branch(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mul_i32 s4, s4, s6 ; GFX11-NEXT: s_add_i32 s5, s7, s5 ; GFX11-NEXT: s_mov_b32 s6, 0 -; GFX11-NEXT: s_cbranch_execnz .LBB12_4 -; GFX11-NEXT: .LBB12_2: ; %if +; GFX11-NEXT: s_cbranch_execnz .LBB14_4 +; GFX11-NEXT: .LBB14_2: ; %if ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s4, s2 ; GFX11-NEXT: s_mov_b32 s5, s3 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[4:7], 0 -; GFX11-NEXT: s_branch .LBB12_5 -; GFX11-NEXT: .LBB12_3: +; GFX11-NEXT: s_branch .LBB14_5 +; GFX11-NEXT: .LBB14_3: ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GFX11-NEXT: s_branch .LBB12_2 -; GFX11-NEXT: .LBB12_4: +; GFX11-NEXT: s_branch .LBB14_2 +; GFX11-NEXT: .LBB14_4: ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX11-NEXT: .LBB12_5: ; %endif +; GFX11-NEXT: .LBB14_5: ; %endif ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(0) From 97217d188469c78d69b65059cabc123e847a2c66 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Sat, 14 Oct 2023 17:27:37 -0400 Subject: [PATCH 2/6] [mlir] Fix '-Wunused' warning. NFC --- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index d070e42ac0c7d..e3562049cd81c 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -991,7 +991,7 @@ FailureOr ModuleImport::convertConstant(llvm::Constant *constant) { } // Convert none token constants. - if (auto *noneToken = dyn_cast(constant)) { + if (isa(constant)) { return builder.create(loc).getResult(); } From e9c101a7533a829f48678589c7382d4c21c2eb1b Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 16 Oct 2023 17:08:12 +0200 Subject: [PATCH 3/6] [libc++] Add missing <__availability> include --- libcxx/include/sstream | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 47c2d0553a57c..7db5409871873 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -267,6 +267,7 @@ typedef basic_stringstream wstringstream; */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__availability> #include <__config> #include <__fwd/sstream.h> #include <__utility/swap.h> From 903faefc14eb838a20c0526a14d44dbb0fcea85b Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Mon, 16 Oct 2023 15:14:48 +0000 Subject: [PATCH 4/6] [Flang][OpenMP] Port three tests to HLFIR flow These are copies of tests from flang/test/Lower/OpenMP/FIR --- .../Lower/OpenMP/firstprivate-commonblock.f90 | 34 ++ flang/test/Lower/OpenMP/unstructured.f90 | 348 ++++++++++++++++++ flang/test/Lower/OpenMP/wsloop.f90 | 75 ++++ 3 files changed, 457 insertions(+) create mode 100644 flang/test/Lower/OpenMP/firstprivate-commonblock.f90 create mode 100644 flang/test/Lower/OpenMP/unstructured.f90 create mode 100644 flang/test/Lower/OpenMP/wsloop.f90 diff --git a/flang/test/Lower/OpenMP/firstprivate-commonblock.f90 b/flang/test/Lower/OpenMP/firstprivate-commonblock.f90 new file mode 100644 index 0000000000000..ff064a74d491a --- /dev/null +++ b/flang/test/Lower/OpenMP/firstprivate-commonblock.f90 @@ -0,0 +1,34 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK: func.func @_QPfirstprivate_common() { +!CHECK: %[[val_0:.*]] = fir.address_of(@c_) : !fir.ref> +!CHECK: %[[val_1:.*]] = fir.convert %[[val_0]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_c0:.*]] = arith.constant 0 : index +!CHECK: %[[val_2:.*]] = fir.coordinate_of %[[val_1]], %[[val_c0]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_3:.*]] = fir.convert %[[val_2]] : (!fir.ref) -> !fir.ref +!CHECK: %[[VAL_3_DECL:.*]]:2 = hlfir.declare %[[val_3]] {uniq_name = "_QFfirstprivate_commonEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[val_4:.*]] = fir.convert %[[val_0]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_c4:.*]] = arith.constant 4 : index +!CHECK: %[[val_5:.*]] = fir.coordinate_of %[[val_4]], %[[val_c4]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_6:.*]] = fir.convert %[[val_5]] : (!fir.ref) -> !fir.ref +!CHECK: %[[VAL_6_DECL:.*]]:2 = hlfir.declare %[[val_6]] {uniq_name = "_QFfirstprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.parallel { +!CHECK: %[[val_7:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFfirstprivate_commonEx"} +!CHECK: %[[VAL_7_DECL:.*]]:2 = hlfir.declare %[[val_7]] {uniq_name = "_QFfirstprivate_commonEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[val_8:.*]] = fir.load %[[VAL_3_DECL]]#1 : !fir.ref +!CHECK: fir.store %[[val_8]] to %[[VAL_7_DECL]]#1 : !fir.ref +!CHECK: %[[val_9:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFfirstprivate_commonEy"} +!CHECK: %[[VAL_9_DECL:.*]]:2 = hlfir.declare %[[val_9]] {uniq_name = "_QFfirstprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[val_10:.*]] = fir.load %[[VAL_6_DECL]]#1 : !fir.ref +!CHECK: fir.store %[[val_10]] to %[[VAL_9_DECL]]#1 : !fir.ref +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +!CHECK: } + +subroutine firstprivate_common + common /c/ x, y + real x, y + !$omp parallel firstprivate(/c/) + !$omp end parallel +end subroutine diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90 new file mode 100644 index 0000000000000..e5bf980ce90fd --- /dev/null +++ b/flang/test/Lower/OpenMP/unstructured.f90 @@ -0,0 +1,348 @@ +! Test unstructured code adjacent to and inside OpenMP constructs. + +! RUN: bbc %s -fopenmp -emit-hlfir -o "-" | FileCheck %s + +! CHECK-LABEL: func @_QPss1{{.*}} { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb4 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb5 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb4 +! CHECK: ^bb4: // pred: ^bb2 +! CHECK: fir.call @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb5: // 2 preds: ^bb1, ^bb3 +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss1(n) ! unstructured code followed by a structured OpenMP construct + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss1-A', i + enddo + !$omp master + print*, 'ss1-B', i + !$omp end master + print* +end + +! CHECK-LABEL: func @_QPss2{{.*}} { +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb4 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb5 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb4 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb5: // 2 preds: ^bb1, ^bb3 +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct + !$omp master + print*, 'ss2-A', n + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss2-B', i + enddo + !$omp end master + print*, 'ss2-C', i + print* +end + +! CHECK-LABEL: func @_QPss3{{.*}} { +! CHECK: omp.parallel { +! CHECK: %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned} +! CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_K]] {uniq_name = "_QFss3Ek"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned} +! CHECK: %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFss3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned} +! CHECK: %[[OMP_LOOP_K_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFss3Ek"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { +! CHECK: fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { +! CHECK: fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref +! CHECK: br ^bb1 +! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 +! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb6 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb5 +! CHECK: ^bb4: // pred: ^bb3 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: %[[LOAD_2:.*]] = fir.load %[[K_DECL]]#0 : !fir.ref +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]]) +! CHECK: br ^bb2 +! CHECK: ^bb6: // 2 preds: ^bb2, ^bb4 +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^bb1 +! CHECK: ^bb4: // pred: ^bb1 +! CHECK: omp.terminator +! CHECK: } +! CHECK: } +subroutine ss3(n) ! nested unstructured OpenMP constructs + !$omp parallel + do i = 1, 3 + !$omp do + do k = 1, 3 + print*, 'ss3-A', k + enddo + !$omp end do + !$omp do + do j = 1, 3 + do k = 1, 3 + if (k .eq. n) exit + print*, 'ss3-B', k + enddo + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss4{{.*}} { +! CHECK: omp.parallel { +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} +! CHECK: %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref +! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} +! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} +! CHECK: fir.if %[[COND_XOR]] { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: %[[LOAD:.*]] = fir.load %[[OMP_LOOP_J_DECL]]#0 : !fir.ref +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) +! CHECK: } else { +! CHECK: } +! CHECK-NEXT: omp.yield +! CHECK-NEXT: } +! CHECK: omp.terminator +! CHECK-NEXT:} +subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs + !$omp parallel + do i = 1, 3 + !$omp do + do j = 1, 3 + if (j .eq. n) cycle + print*, 'ss4', j + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss5() { +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]] +! CHECK: ^[[BB4]]: +! CHECK: br ^[[BB6]] +! CHECK: ^[[BB3]]: +! CHECK: br ^[[BB2]] +! CHECK: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel) + integer :: x + !$omp parallel private(x) + !$omp do + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + x = j - 222 + enddo + !$omp end do + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss6() { +! CHECK: omp.parallel { +! CHECK: br ^[[BB1_OUTER:.*]] +! CHECK: ^[[BB1_OUTER]]: +! CHECK: cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]] +! CHECK: ^[[BB2_OUTER]]: +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]] +! CHECK: ^[[BB4]]: +! CHECK: br ^[[BB6]] +! CHECK: ^[[BB5]] +! CHECK: br ^[[BB2]] +! CHECK: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^[[BB1_OUTER]] +! CHECK: ^[[BB3_OUTER]]: +! CHECK: omp.terminator +! CHECK: } +subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel) + integer :: x + !$omp parallel private(x) + do i = 1, 3 + !$omp do + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + x = j - 222 + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QPss7() { +! CHECK: br ^[[BB1_OUTER:.*]] +! CHECK: ^[[BB1_OUTER]]: +! CHECK: cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]] +! CHECK-NEXT: ^[[BB2_OUTER:.*]]: +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK-NEXT: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK-NEXT: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK-NEXT: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]] +! CHECK-NEXT: ^[[BB4]]: +! CHECK: br ^[[BB6]] +! CHECK-NEXT: ^[[BB5]]: +! CHECK: br ^[[BB2]] +! CHECK-NEXT: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: br ^[[BB1_OUTER]] +! CHECK-NEXT: ^[[BB3_OUTER]]: +! CHECK-NEXT: return +subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop) + integer :: x + do i = 1, 3 + !$omp parallel do private(x) + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + enddo + !$omp end parallel do + enddo +end + +! CHECK-LABEL: func @_QPss8() { +! CHECK: omp.parallel { +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^[[BB1:.*]] +! CHECK-NEXT: ^[[BB1]]: +! CHECK: br ^[[BB2:.*]] +! CHECK: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]] +! CHECK: ^[[BB3]]: +! CHECK: cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]] +! CHECK: ^[[BB4]]: +! CHECK-NEXT: br ^[[BB6]] +! CHECK: ^[[BB5]]: +! CHECK: br ^[[BB2]] +! CHECK-NEXT: ^[[BB6]]: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +subroutine ss8() ! EXIT inside OpenMP parallel do + integer :: x + !$omp parallel do private(x) + do j = 1, 3 + x = j * i + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + enddo + enddo + !$omp end parallel do +end + +! CHECK-LABEL: func @_QPss9() { +! CHECK: omp.parallel { +! CHECK-NEXT: omp.parallel { +! CHECK: br ^[[BB1:.*]] +! CHECK: ^[[BB1]]: +! CHECK: cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB5:.*]] +! CHECK-NEXT: ^[[BB2]]: +! CHECK: cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB4:.*]] +! CHECK-NEXT: ^[[BB3]]: +! CHECK-NEXT: br ^[[BB5]] +! CHECK-NEXT: ^[[BB4]]: +! CHECK: br ^[[BB1]] +! CHECK-NEXT: ^[[BB5]]: +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK: omp.terminator +! CHECK-NEXT } +! CHECK: } +subroutine ss9() ! EXIT inside OpenMP parallel (inside parallel) + integer :: x + !$omp parallel + !$omp parallel private(x) + do k = 1, 3 + if (k .eq. n) exit + x = k + x = x + k + end do + !$omp end parallel + !$omp end parallel +end + +! CHECK-LABEL: func @_QQmain +program p + call ss1(2) + call ss2(2) + call ss3(2) + call ss4(2) + call ss5() + call ss6() + call ss7() + call ss8() + call ss9() +end diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90 new file mode 100644 index 0000000000000..4068f715c3e18 --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop.f90 @@ -0,0 +1,75 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing). + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s + +!CHECK-LABEL: func @_QPsimple_loop() +subroutine simple_loop + integer :: i + ! CHECK: omp.parallel + !$OMP PARALLEL + ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP DO + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + !$OMP END DO + ! CHECK: omp.terminator + !$OMP END PARALLEL +end subroutine + +!CHECK-LABEL: func @_QPsimple_loop_with_step() +subroutine simple_loop_with_step + integer :: i + ! CHECK: omp.parallel + !$OMP PARALLEL + ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_with_stepEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 + ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref + !$OMP DO + do i=1, 9, 2 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + !$OMP END DO + ! CHECK: omp.terminator + !$OMP END PARALLEL +end subroutine + +!CHECK-LABEL: func @_QPloop_with_schedule_nowait() +subroutine loop_with_schedule_nowait + integer :: i + ! CHECK: omp.parallel + !$OMP PARALLEL + ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFloop_with_schedule_nowaitEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 + ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + !$OMP DO SCHEDULE(runtime) + do i=1, 9 + ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + print*, i + end do + ! CHECK: omp.yield + !$OMP END DO NOWAIT + ! CHECK: omp.terminator + !$OMP END PARALLEL +end subroutine From 144c5b6d58803a2d4a0fe92a0fe331ff0347dc3b Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 16 Oct 2023 15:25:44 +0000 Subject: [PATCH 5/6] [compiler-rt][hwasan] Disable deep-recursion.c test on AArch64 Linux The test program occasionaly fails to detect the fault as it should. See https://github.com/llvm/llvm-project/issues/69221. --- compiler-rt/test/hwasan/TestCases/deep-recursion.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/test/hwasan/TestCases/deep-recursion.c b/compiler-rt/test/hwasan/TestCases/deep-recursion.c index fde8a0db5ad15..39902d072a0d3 100644 --- a/compiler-rt/test/hwasan/TestCases/deep-recursion.c +++ b/compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -17,6 +17,9 @@ // Stack histories are currently not recorded on x86. // XFAIL: target=x86_64{{.*}} +// Flaky on AArch64 Linux, see https://github.com/llvm/llvm-project/issues/69221. +// UNSUPPORTED: target=aarch64-linux{{.*}} + #include // At least -O1 is needed for this function to not have a stack frame on // AArch64. From 6ade5183232dc1398205d7c9dbe21243b2560837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 16 Oct 2023 08:52:02 -0700 Subject: [PATCH 6/6] [flang][openacc][NFC] Issue better error message when directive is wrong (#69034) --- flang/lib/Parser/openacc-parsers.cpp | 32 ++++++++++++---------- flang/test/Semantics/OpenACC/acc-error.f90 | 15 ++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 flang/test/Semantics/OpenACC/acc-error.f90 diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 09b30e679de0e..75aeffd29f92f 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -150,11 +150,12 @@ TYPE_PARSER(sourced(construct( TYPE_PARSER(construct( sourced(Parser{}), Parser{})) -TYPE_PARSER(construct(startAccLine >> "END LOOP"_tok)) +TYPE_PARSER(construct("END LOOP"_tok)) TYPE_PARSER(construct( sourced(Parser{} / endAccLine), - maybe(Parser{}), maybe(Parser{} / endAccLine))) + maybe(Parser{}), + maybe(startAccLine >> Parser{} / endAccLine))) // 2.15.1 Routine directive TYPE_PARSER(sourced(construct(verbatim("ROUTINE"_tok), @@ -227,22 +228,25 @@ TYPE_PARSER(construct( TYPE_PARSER(construct( sourced(Parser{}), Parser{})) -TYPE_PARSER( - startAccLine >> first(sourced(construct( - Parser{})), - sourced(construct( - Parser{})))) +TYPE_PARSER(startAccLine >> + withMessage("expected OpenACC directive"_err_en_US, + first(sourced(construct( + Parser{})), + sourced(construct( + Parser{}))))) // OpenACC constructs TYPE_CONTEXT_PARSER("OpenACC construct"_en_US, startAccLine >> - first(construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}), - construct(Parser{}))) + withMessage("expected OpenACC directive"_err_en_US, + first(construct(Parser{}), + construct(Parser{}), + construct(Parser{}), + construct( + Parser{}), + construct(Parser{}), + construct(Parser{}), + construct(Parser{})))) TYPE_PARSER(startAccLine >> sourced(construct(sourced("END"_tok >> diff --git a/flang/test/Semantics/OpenACC/acc-error.f90 b/flang/test/Semantics/OpenACC/acc-error.f90 new file mode 100644 index 0000000000000..b1c3b77847429 --- /dev/null +++ b/flang/test/Semantics/OpenACC/acc-error.f90 @@ -0,0 +1,15 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenacc + +! Check parser specific error for OpenACC + + +subroutine test(a, n) + integer :: a(n) + !ERROR: expected OpenACC directive + !$acc p + integer :: i,j + + i = 0 + !ERROR: expected OpenACC directive + !$acc p + end subroutine